├── src └── LaneDetection │ ├── models │ ├── __pycache__ │ │ └── laneNet_class.cpython-37.pyc │ ├── LaneNet │ │ ├── lanenet_front_end.py │ │ ├── tusimple_lanenet.yaml │ │ ├── lanenet.py │ │ ├── lanenet_discriminative_loss.py │ │ ├── parse_config_utils.py │ │ ├── lanenet_back_end.py │ │ ├── vgg16_based_fcn.py │ │ ├── lanenet_postprocess.py │ │ ├── cnn_basenet.py │ │ └── bisenet_v2.py │ └── laneNet_class.py │ ├── readMe.md │ ├── read_img.py │ └── lane_detection_publisher.py ├── setup.py ├── README.md ├── .gitignore ├── package.xml ├── CMakeLists.txt └── LICENSE /src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Erendrgnl/Carla-Ros-Lane-Keeping-System/HEAD/src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from catkin_pkg.python_setup import generate_distutils_setup 3 | 4 | d = generate_distutils_setup( 5 | packages=['LaneDetection',], 6 | package_dir={'': 'src'} 7 | ) 8 | setup(**d) 9 | -------------------------------------------------------------------------------- /src/LaneDetection/readMe.md: -------------------------------------------------------------------------------- 1 | # Lane Detection 2 | 3 | ## Implemented Models 4 | 5 | models implementation files added in models/**.py 6 | 7 | ## LaneNet
8 | [paper] (https://arxiv.org/abs/1802.05591)
9 | [link] (https://github.com/MaybeShewill-CV/lanenet-lane-detection)
10 | 11 | for getting inferance with LaneNet model please visit link below, download weights and copy files to models/LaneNet/weights. 12 | 13 | [weights] (https://www.dropbox.com/sh/0b6r0ljqi76kyg9/AADedYWO3bnx4PhK1BmbJkJKa?dl=0) 14 | 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Carla Ros Lane Keeping System 2 | 3 | Use Carla simulator and ROS bridge to implement lane keeping system. 4 | 5 | ![laneNet](https://user-images.githubusercontent.com/37477289/140699703-68a6fbb8-b398-48e2-a534-553caf1f5c84.gif) 6 | 7 | ## Enviroment 8 | 9 | Tested on CARLA_0.9.12. visit to following link for Carla version 10 | [link] (/https://github.com/carla-simulator/carla/releases/tag/0.9.12) 11 | 12 | Ros bridge 13 | [link] (https://github.com/carla-simulator/ros-bridge) 14 | 15 | 16 | ### What will be added 17 | - [✓] Lane detection algortihm 18 | - [✓] Ros nodes will be update 19 | - [ ] MPC controller implementation 20 | - [ ] Kalman filter 21 | 22 | -------------------------------------------------------------------------------- /src/LaneDetection/read_img.py: -------------------------------------------------------------------------------- 1 | import rospy 2 | import numpy as np 3 | from sensor_msgs.msg import Image 4 | import cv2 5 | from threading import Thread 6 | 7 | 8 | 9 | def callback(image): 10 | byte_image = image.data 11 | np_image = np.frombuffer(byte_image,dtype=np.uint8) 12 | bgra_image = np_image.reshape((image.height,image.width,4)) 13 | bgr_image = cv2.cvtColor(bgra_image,cv2.COLOR_BGRA2BGR) 14 | 15 | 16 | cv2.imshow("Camera Front",bgr_image) 17 | cv2.waitKey(10) 18 | 19 | if __name__ == "__main__": 20 | rospy.init_node('camera', anonymous=True) 21 | 22 | rospy.Subscriber("/lka/detected_image", Image, callback) 23 | 24 | Thread(target=rospy.spin()).start() 25 | -------------------------------------------------------------------------------- /src/LaneDetection/models/LaneNet/lanenet_front_end.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 19-4-24 下午3:53 4 | # @Author : MaybeShewill-CV 5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection 6 | # @File : lanenet_front_end.py 7 | # @IDE: PyCharm 8 | """ 9 | LaneNet frontend branch which is mainly used for feature extraction 10 | """ 11 | import cnn_basenet 12 | import vgg16_based_fcn 13 | import bisenet_v2 14 | 15 | 16 | class LaneNetFrondEnd(cnn_basenet.CNNBaseModel): 17 | """ 18 | LaneNet frontend which is used to extract image features for following process 19 | """ 20 | def __init__(self, phase, net_flag, cfg): 21 | """ 22 | 23 | """ 24 | super(LaneNetFrondEnd, self).__init__() 25 | self._cfg = cfg 26 | 27 | self._frontend_net_map = { 28 | 'vgg': vgg16_based_fcn.VGG16FCN(phase=phase, cfg=self._cfg), 29 | 'bisenetv2': bisenet_v2.BiseNetV2(phase=phase, cfg=self._cfg), 30 | } 31 | 32 | self._net = self._frontend_net_map[net_flag] 33 | 34 | def build_model(self, input_tensor, name, reuse): 35 | """ 36 | 37 | :param input_tensor: 38 | :param name: 39 | :param reuse: 40 | :return: 41 | """ 42 | 43 | return self._net.build_model( 44 | input_tensor=input_tensor, 45 | name=name, 46 | reuse=reuse 47 | ) 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/settings.json 2 | .vscode/c_cpp_properties.json 3 | src/LaneDetection/models/LaneNet/__pycache__/bisenet_v2.cpython-37.pyc 4 | src/LaneDetection/models/LaneNet/__pycache__/cnn_basenet.cpython-37.pyc 5 | src/LaneDetection/models/LaneNet/__pycache__/inferance.cpython-37.pyc 6 | src/LaneDetection/models/LaneNet/__pycache__/lanenet_back_end.cpython-37.pyc 7 | src/LaneDetection/models/LaneNet/__pycache__/lanenet_discriminative_loss.cpython-37.pyc 8 | src/LaneDetection/models/LaneNet/__pycache__/lanenet_front_end.cpython-37.pyc 9 | src/LaneDetection/models/LaneNet/__pycache__/lanenet.cpython-37.pyc 10 | src/LaneDetection/models/LaneNet/__pycache__/parse_config_utils.cpython-37.pyc 11 | src/LaneDetection/models/LaneNet/__pycache__/vgg16_based_fcn.cpython-37.pyc 12 | src/LaneDetection/models/LaneNet/weights/checkpoint 13 | src/LaneDetection/models/LaneNet/weights/tusimple_lanenet.ckpt.data-00000-of-00001 14 | src/LaneDetection/models/LaneNet/weights/tusimple_lanenet.ckpt.index 15 | src/LaneDetection/models/LaneNet/weights/tusimple_lanenet.ckpt.meta 16 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc 17 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc 18 | src/LaneDetection/__pycache__/lane_detection_publisher.cpython-37.pyc 19 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc 20 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc 21 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc 22 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc 23 | -------------------------------------------------------------------------------- /src/LaneDetection/lane_detection_publisher.py: -------------------------------------------------------------------------------- 1 | import rospy 2 | import numpy as np 3 | from sensor_msgs.msg import Image 4 | import cv2 5 | 6 | class LaneDetection(object): 7 | def __init__(self,model): 8 | self.model = model 9 | rospy.init_node('camera', anonymous=True) 10 | rospy.Subscriber("/carla/ego_vehicle/rgb_front/image", Image, self.callback) 11 | self.pub = rospy.Publisher("/lka/detected_image",Image,queue_size=10) 12 | rospy.spin() 13 | 14 | def callback(self,raw_image): 15 | byte_image = raw_image.data 16 | np_image = np.frombuffer(byte_image,dtype=np.uint8) 17 | bgra_image = np_image.reshape((raw_image.height,raw_image.width,4)) 18 | rgb_image = cv2.cvtColor(bgra_image,cv2.COLOR_BGRA2RGB) 19 | 20 | publish_image = Image() 21 | publish_image.header = raw_image.header 22 | publish_image.is_bigendian = raw_image.is_bigendian 23 | publish_image.encoding = raw_image.encoding 24 | 25 | prediction,lane_center = self.model.predict(rgb_image) 26 | publish_image.height = prediction.shape[0] 27 | publish_image.width = prediction.shape[1] 28 | 29 | prediction = cv2.cvtColor(prediction,cv2.COLOR_RGB2BGRA).astype(np.uint8) 30 | byte_data = prediction.tobytes() 31 | publish_image.data = byte_data 32 | 33 | self.pub.publish(publish_image) 34 | 35 | 36 | if __name__ == "__main__": 37 | from LaneDetection.models import laneNet_class 38 | 39 | model = laneNet_class.LaneNet() 40 | ros_node = LaneDetection(model) -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | lka 4 | 0.0.0 5 | The lka package 6 | 7 | 8 | 9 | 10 | eren 11 | 12 | 13 | 14 | 15 | 16 | TODO 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | catkin 52 | roscpp 53 | rospy 54 | std_msgs 55 | roscpp 56 | rospy 57 | std_msgs 58 | roscpp 59 | rospy 60 | std_msgs 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /src/LaneDetection/models/LaneNet/tusimple_lanenet.yaml: -------------------------------------------------------------------------------- 1 | AUG: 2 | RESIZE_METHOD: 'stepscaling' # choice unpadding rangescaling and stepscaling 3 | FIX_RESIZE_SIZE: [720, 720] # (width, height), for unpadding 4 | INF_RESIZE_VALUE: 500 # for rangescaling 5 | MAX_RESIZE_VALUE: 600 # for rangescaling 6 | MIN_RESIZE_VALUE: 400 # for rangescaling 7 | MAX_SCALE_FACTOR: 2.0 # for stepscaling 8 | MIN_SCALE_FACTOR: 0.75 # for stepscaling 9 | SCALE_STEP_SIZE: 0.25 # for stepscaling 10 | TRAIN_CROP_SIZE: [512, 256] # crop size for training 11 | EVAL_CROP_SIZE: [512, 256] # crop size for evaluating 12 | CROP_PAD_SIZE: 32 13 | MIRROR: True 14 | FLIP: False 15 | FLIP_RATIO: 0.5 16 | RICH_CROP: 17 | ENABLE: False 18 | BLUR: True 19 | BLUR_RATIO: 0.2 20 | MAX_ROTATION: 15 21 | MIN_AREA_RATIO: 0.5 22 | ASPECT_RATIO: 0.5 23 | BRIGHTNESS_JITTER_RATIO: 0.5 24 | CONTRAST_JITTER_RATIO: 0.5 25 | SATURATION_JITTER_RATIO: 0.5 26 | DATASET: 27 | DATA_DIR: 'REPO_ROOT_PATH/data/training_data_example/' 28 | IMAGE_TYPE: 'rgb' # choice rgb or rgba 29 | NUM_CLASSES: 2 30 | TEST_FILE_LIST: 'REPO_ROOT_PATH/data/training_data_example/test.txt' 31 | TRAIN_FILE_LIST: 'REPO_ROOT_PATH/data/training_data_example/train.txt' 32 | VAL_FILE_LIST: 'REPO_ROOT_PATH/data/training_data_example/val.txt' 33 | IGNORE_INDEX: 255 34 | PADDING_VALUE: [127.5, 127.5, 127.5] 35 | MEAN_VALUE: [0.5, 0.5, 0.5] 36 | STD_VALUE: [0.5, 0.5, 0.5] 37 | CPU_MULTI_PROCESS_NUMS: 8 38 | FREEZE: 39 | MODEL_FILENAME: 'model' 40 | PARAMS_FILENAME: 'params' 41 | MODEL: 42 | MODEL_NAME: 'lanenet' 43 | FRONT_END: 'bisenetv2' 44 | EMBEDDING_FEATS_DIMS: 4 45 | BISENETV2: 46 | GE_EXPAND_RATIO: 6 47 | SEMANTIC_CHANNEL_LAMBDA: 0.25 48 | SEGHEAD_CHANNEL_EXPAND_RATIO: 2 49 | TEST: 50 | TEST_MODEL: 'model/cityscapes/final' 51 | TRAIN: 52 | MODEL_SAVE_DIR: 'model/tusimple/' 53 | TBOARD_SAVE_DIR: 'tboard/tusimple/' 54 | MODEL_PARAMS_CONFIG_FILE_NAME: "model_train_config.json" 55 | RESTORE_FROM_SNAPSHOT: 56 | ENABLE: False 57 | SNAPSHOT_PATH: '' 58 | SNAPSHOT_EPOCH: 8 59 | BATCH_SIZE: 32 60 | VAL_BATCH_SIZE: 4 61 | EPOCH_NUMS: 905 62 | WARM_UP: 63 | ENABLE: True 64 | EPOCH_NUMS: 8 65 | FREEZE_BN: 66 | ENABLE: False 67 | COMPUTE_MIOU: 68 | ENABLE: True 69 | EPOCH: 1 70 | MULTI_GPU: 71 | ENABLE: True 72 | GPU_DEVICES: ['0', '1'] 73 | CHIEF_DEVICE_INDEX: 0 74 | SOLVER: 75 | LR: 0.001 76 | LR_POLICY: 'poly' 77 | LR_POLYNOMIAL_POWER: 0.9 78 | OPTIMIZER: 'sgd' 79 | MOMENTUM: 0.9 80 | WEIGHT_DECAY: 0.0005 81 | MOVING_AVE_DECAY: 0.9995 82 | LOSS_TYPE: 'cross_entropy' 83 | OHEM: 84 | ENABLE: False 85 | SCORE_THRESH: 0.65 86 | MIN_SAMPLE_NUMS: 65536 87 | GPU: 88 | GPU_MEMORY_FRACTION: 0.9 89 | TF_ALLOW_GROWTH: True 90 | POSTPROCESS: 91 | MIN_AREA_THRESHOLD: 100 92 | DBSCAN_EPS: 0.35 93 | DBSCAN_MIN_SAMPLES: 1000 94 | LOG: 95 | SAVE_DIR: './log' 96 | LEVEL: INFO 97 | -------------------------------------------------------------------------------- /src/LaneDetection/models/LaneNet/lanenet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 19-4-24 下午8:50 4 | # @Author : MaybeShewill-CV 5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection 6 | # @File : lanenet.py 7 | # @IDE: PyCharm 8 | """ 9 | Implement LaneNet Model 10 | """ 11 | import tensorflow as tf 12 | 13 | import lanenet_back_end 14 | import lanenet_front_end 15 | import cnn_basenet 16 | 17 | 18 | class LaneNet(cnn_basenet.CNNBaseModel): 19 | """ 20 | 21 | """ 22 | def __init__(self, phase, cfg): 23 | """ 24 | 25 | """ 26 | super(LaneNet, self).__init__() 27 | self._cfg = cfg 28 | self._net_flag = self._cfg.MODEL.FRONT_END 29 | 30 | self._frontend = lanenet_front_end.LaneNetFrondEnd( 31 | phase=phase, net_flag=self._net_flag, cfg=self._cfg 32 | ) 33 | self._backend = lanenet_back_end.LaneNetBackEnd( 34 | phase=phase, cfg=self._cfg 35 | ) 36 | 37 | def inference(self, input_tensor, name, reuse=False): 38 | """ 39 | 40 | :param input_tensor: 41 | :param name: 42 | :param reuse 43 | :return: 44 | """ 45 | with tf.variable_scope(name_or_scope=name, reuse=reuse): 46 | # first extract image features 47 | extract_feats_result = self._frontend.build_model( 48 | input_tensor=input_tensor, 49 | name='{:s}_frontend'.format(self._net_flag), 50 | reuse=reuse 51 | ) 52 | 53 | # second apply backend process 54 | binary_seg_prediction, instance_seg_prediction = self._backend.inference( 55 | binary_seg_logits=extract_feats_result['binary_segment_logits']['data'], 56 | instance_seg_logits=extract_feats_result['instance_segment_logits']['data'], 57 | name='{:s}_backend'.format(self._net_flag), 58 | reuse=reuse 59 | ) 60 | 61 | return binary_seg_prediction, instance_seg_prediction 62 | 63 | def compute_loss(self, input_tensor, binary_label, instance_label, name, reuse=False): 64 | """ 65 | calculate lanenet loss for training 66 | :param input_tensor: 67 | :param binary_label: 68 | :param instance_label: 69 | :param name: 70 | :param reuse: 71 | :return: 72 | """ 73 | with tf.variable_scope(name_or_scope=name, reuse=reuse): 74 | # first extract image features 75 | extract_feats_result = self._frontend.build_model( 76 | input_tensor=input_tensor, 77 | name='{:s}_frontend'.format(self._net_flag), 78 | reuse=reuse 79 | ) 80 | 81 | # second apply backend process 82 | calculated_losses = self._backend.compute_loss( 83 | binary_seg_logits=extract_feats_result['binary_segment_logits']['data'], 84 | binary_label=binary_label, 85 | instance_seg_logits=extract_feats_result['instance_segment_logits']['data'], 86 | instance_label=instance_label, 87 | name='{:s}_backend'.format(self._net_flag), 88 | reuse=reuse 89 | ) 90 | 91 | return calculated_losses 92 | -------------------------------------------------------------------------------- /src/LaneDetection/models/laneNet_class.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | import os 5 | import sys 6 | 7 | ROOT_PATH = os.path.dirname(os.path.abspath(__file__)) 8 | sys.path.append(os.path.join(ROOT_PATH,"LaneNet")) 9 | 10 | import lanenet 11 | #from lanenet_model import lanenet_postprocess 12 | import parse_config_utils 13 | 14 | 15 | class LaneNet(object): 16 | def __init__(self): 17 | self.cfg = parse_config_utils.lanenet_cfg 18 | self.input_tensor = tf.placeholder(dtype=tf.float32, shape=[1, 256, 512, 3], name='input_tensor') 19 | self.net = lanenet.LaneNet(phase='test', cfg=self.cfg) 20 | self.binary_seg_ret, self.instance_seg_ret = self.net.inference(input_tensor=self.input_tensor, name='LaneNet') 21 | 22 | 23 | self.weights_path = os.path.join(ROOT_PATH,"LaneNet","weights/tusimple_lanenet.ckpt") 24 | 25 | # Set sess configuration 26 | sess_config = tf.ConfigProto() 27 | sess_config.gpu_options.per_process_gpu_memory_fraction = self.cfg.GPU.GPU_MEMORY_FRACTION 28 | sess_config.gpu_options.allow_growth = self.cfg.GPU.TF_ALLOW_GROWTH 29 | sess_config.gpu_options.allocator_type = 'BFC' 30 | 31 | self.sess = tf.Session(config=sess_config) 32 | 33 | # define moving average version of the learned variables for eval 34 | with tf.variable_scope(name_or_scope='moving_avg'): 35 | variable_averages = tf.train.ExponentialMovingAverage( 36 | self.cfg.SOLVER.MOVING_AVE_DECAY) 37 | variables_to_restore = variable_averages.variables_to_restore() 38 | 39 | self.saver = tf.train.Saver(variables_to_restore) 40 | self.saver.restore(sess=self.sess, save_path=self.weights_path) 41 | 42 | print("LaneNet Model Initilaized") 43 | 44 | @staticmethod 45 | def preProcessing(image): 46 | image = cv2.resize(image, (512, 256), interpolation=cv2.INTER_LINEAR) 47 | image = image / 127.5 - 1.0 48 | return image 49 | 50 | 51 | def predict(self,image): 52 | src_image = self.preProcessing(image) 53 | 54 | with self.sess.as_default(): 55 | self.binary_seg_image, self.instance_seg_image = self.sess.run( 56 | [self.binary_seg_ret, self.instance_seg_ret], 57 | feed_dict={self.input_tensor: [src_image]} 58 | ) 59 | rgb = self.instance_seg_image[0].astype(np.uint8) 60 | bw = self.binary_seg_image[0].astype(np.uint8) 61 | res = cv2.bitwise_and(rgb,rgb,mask=bw) 62 | 63 | lanes_rgb,center_xy = self.postProcess(res) 64 | return lanes_rgb,center_xy 65 | 66 | def postProcess(self,image): 67 | src_img = cv2.cvtColor(image,cv2.COLOR_RGBA2RGB) 68 | 69 | red_mask = (src_img[:,:,2]>200).astype(np.uint8) 70 | src_img = cv2.bitwise_and(src_img,src_img,mask=1-red_mask) 71 | 72 | #Right Lanes 73 | green_mask = (src_img[:,:,1]>200).astype(np.uint8) 74 | green_area = cv2.bitwise_and(src_img,src_img,mask=green_mask) 75 | 76 | #Left Lanes 77 | blue_mask = (src_img[:,:,0]>200).astype(np.uint8) 78 | blue_area = cv2.bitwise_and(src_img,src_img,mask=blue_mask) 79 | 80 | lanes_rgb = cv2.addWeighted(green_area,1,blue_area,1,0) 81 | 82 | img_center_point,center_xy = self.window_search(green_mask,blue_mask) 83 | lanes_rgb = cv2.addWeighted(lanes_rgb,1,img_center_point,1,0) 84 | 85 | return lanes_rgb,center_xy 86 | 87 | @staticmethod 88 | def window_search(righ_lane, left_lane): 89 | center_coordinates =[] 90 | out = np.zeros(righ_lane.shape,np.uint8) 91 | out = cv2.merge((out,out,out)) 92 | 93 | mid_point = np.int(righ_lane.shape[1]/2) 94 | 95 | nwindows = 9 96 | h = righ_lane.shape[0] 97 | vp = int(h/2) 98 | window_height = np.int(vp/nwindows) 99 | 100 | r_lane = righ_lane[vp:,:].copy() 101 | r_lane = cv2.erode(r_lane,np.ones((3,3))) 102 | 103 | l_lane = left_lane[vp:,:] 104 | l_lane = cv2.erode(l_lane,np.ones((3,3))) 105 | 106 | for window in range(nwindows): 107 | win_y_low = vp - (window+1)*window_height 108 | win_y_high = vp - window*window_height 109 | win_y_center = win_y_low + int((win_y_high-win_y_low)/2) 110 | 111 | r_row = r_lane[win_y_low:win_y_high,:] 112 | l_row = l_lane[win_y_low:win_y_high,:] 113 | 114 | histogram = np.sum(r_row, axis=0) 115 | r_point = np.argmax(histogram) 116 | 117 | histogram = np.sum(l_row, axis=0) 118 | l_point = np.argmax(histogram) 119 | 120 | if(l_point != 0) and (r_point != 0): 121 | rd = r_point-mid_point 122 | ld = mid_point-l_point 123 | if(abs(rd-ld)<100): 124 | center = l_point + int((r_point-l_point)/2) 125 | out = cv2.circle(out,(center,vp+win_y_center),2,(0,0,255),-1) 126 | center_coordinates.append((center,vp+win_y_center)) 127 | return out,center_coordinates -------------------------------------------------------------------------------- /src/LaneDetection/models/LaneNet/lanenet_discriminative_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 18-5-11 下午3:48 4 | # @Author : MaybeShewill-CV 5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection 6 | # @File : lanenet_discriminative_loss.py 7 | # @IDE: PyCharm Community Edition 8 | """ 9 | Discriminative Loss for instance segmentation 10 | """ 11 | import tensorflow as tf 12 | 13 | 14 | def discriminative_loss_single( 15 | prediction, 16 | correct_label, 17 | feature_dim, 18 | label_shape, 19 | delta_v, 20 | delta_d, 21 | param_var, 22 | param_dist, 23 | param_reg): 24 | """ 25 | discriminative loss 26 | :param prediction: inference of network 27 | :param correct_label: instance label 28 | :param feature_dim: feature dimension of prediction 29 | :param label_shape: shape of label 30 | :param delta_v: cut off variance distance 31 | :param delta_d: cut off cluster distance 32 | :param param_var: weight for intra cluster variance 33 | :param param_dist: weight for inter cluster distances 34 | :param param_reg: weight regularization 35 | """ 36 | correct_label = tf.reshape( 37 | correct_label, [label_shape[1] * label_shape[0]] 38 | ) 39 | reshaped_pred = tf.reshape( 40 | prediction, [label_shape[1] * label_shape[0], feature_dim] 41 | ) 42 | 43 | # calculate instance nums 44 | unique_labels, unique_id, counts = tf.unique_with_counts(correct_label) 45 | counts = tf.cast(counts, tf.float32) 46 | num_instances = tf.size(unique_labels) 47 | 48 | # calculate instance pixel embedding mean vec 49 | segmented_sum = tf.unsorted_segment_sum( 50 | reshaped_pred, unique_id, num_instances) 51 | mu = tf.div(segmented_sum, tf.reshape(counts, (-1, 1))) 52 | mu_expand = tf.gather(mu, unique_id) 53 | 54 | distance = tf.norm(tf.subtract(mu_expand, reshaped_pred), axis=1, ord=1) 55 | distance = tf.subtract(distance, delta_v) 56 | distance = tf.clip_by_value(distance, 0., distance) 57 | distance = tf.square(distance) 58 | 59 | l_var = tf.unsorted_segment_sum(distance, unique_id, num_instances) 60 | l_var = tf.div(l_var, counts) 61 | l_var = tf.reduce_sum(l_var) 62 | l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32)) 63 | 64 | mu_interleaved_rep = tf.tile(mu, [num_instances, 1]) 65 | mu_band_rep = tf.tile(mu, [1, num_instances]) 66 | mu_band_rep = tf.reshape( 67 | mu_band_rep, 68 | (num_instances * 69 | num_instances, 70 | feature_dim)) 71 | 72 | mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep) 73 | 74 | intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff), axis=1) 75 | zero_vector = tf.zeros(1, dtype=tf.float32) 76 | bool_mask = tf.not_equal(intermediate_tensor, zero_vector) 77 | mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask) 78 | 79 | mu_norm = tf.norm(mu_diff_bool, axis=1, ord=1) 80 | mu_norm = tf.subtract(2. * delta_d, mu_norm) 81 | mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm) 82 | mu_norm = tf.square(mu_norm) 83 | 84 | l_dist = tf.reduce_mean(mu_norm) 85 | 86 | l_reg = tf.reduce_mean(tf.norm(mu, axis=1, ord=1)) 87 | 88 | param_scale = 1. 89 | l_var = param_var * l_var 90 | l_dist = param_dist * l_dist 91 | l_reg = param_reg * l_reg 92 | 93 | loss = param_scale * (l_var + l_dist + l_reg) 94 | 95 | return loss, l_var, l_dist, l_reg 96 | 97 | 98 | def discriminative_loss(prediction, correct_label, feature_dim, image_shape, 99 | delta_v, delta_d, param_var, param_dist, param_reg): 100 | """ 101 | 102 | :return: discriminative loss and its three components 103 | """ 104 | 105 | def cond(label, batch, out_loss, out_var, out_dist, out_reg, i): 106 | return tf.less(i, tf.shape(batch)[0]) 107 | 108 | def body(label, batch, out_loss, out_var, out_dist, out_reg, i): 109 | disc_loss, l_var, l_dist, l_reg = discriminative_loss_single( 110 | prediction[i], correct_label[i], feature_dim, image_shape, delta_v, delta_d, param_var, param_dist, param_reg) 111 | 112 | out_loss = out_loss.write(i, disc_loss) 113 | out_var = out_var.write(i, l_var) 114 | out_dist = out_dist.write(i, l_dist) 115 | out_reg = out_reg.write(i, l_reg) 116 | 117 | return label, batch, out_loss, out_var, out_dist, out_reg, i + 1 118 | 119 | # TensorArray is a data structure that support dynamic writing 120 | output_ta_loss = tf.TensorArray( 121 | dtype=tf.float32, size=0, dynamic_size=True) 122 | output_ta_var = tf.TensorArray( 123 | dtype=tf.float32, size=0, dynamic_size=True) 124 | output_ta_dist = tf.TensorArray( 125 | dtype=tf.float32, size=0, dynamic_size=True) 126 | output_ta_reg = tf.TensorArray( 127 | dtype=tf.float32, size=0, dynamic_size=True) 128 | 129 | _, _, out_loss_op, out_var_op, out_dist_op, out_reg_op, _ = tf.while_loop( 130 | cond, body, [ 131 | correct_label, prediction, output_ta_loss, output_ta_var, output_ta_dist, output_ta_reg, 0]) 132 | out_loss_op = out_loss_op.stack() 133 | out_var_op = out_var_op.stack() 134 | out_dist_op = out_dist_op.stack() 135 | out_reg_op = out_reg_op.stack() 136 | 137 | disc_loss = tf.reduce_mean(out_loss_op) 138 | l_var = tf.reduce_mean(out_var_op) 139 | l_dist = tf.reduce_mean(out_dist_op) 140 | l_reg = tf.reduce_mean(out_reg_op) 141 | 142 | return disc_loss, l_var, l_dist, l_reg 143 | -------------------------------------------------------------------------------- /src/LaneDetection/models/LaneNet/parse_config_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2019/12/13 上午11:17 4 | # @Author : PaddlePaddle 5 | # @Site : https://github.com/PaddlePaddle/PaddleSeg 6 | # @File : parse_config_utils.py 7 | # @IDE: PyCharm 8 | """ 9 | Parse config utils 10 | """ 11 | import os 12 | import yaml 13 | import json 14 | import codecs 15 | from ast import literal_eval 16 | 17 | 18 | class Config(dict): 19 | """ 20 | Config class 21 | """ 22 | def __init__(self, *args, **kwargs): 23 | """ 24 | init class 25 | :param args: 26 | :param kwargs: 27 | """ 28 | if 'config_path' in kwargs: 29 | config_content = self._load_config_file(kwargs['config_path']) 30 | super(Config, self).__init__(config_content) 31 | else: 32 | super(Config, self).__init__(*args, **kwargs) 33 | self.immutable = False 34 | 35 | def __setattr__(self, key, value, create_if_not_exist=True): 36 | """ 37 | 38 | :param key: 39 | :param value: 40 | :param create_if_not_exist: 41 | :return: 42 | """ 43 | if key in ["immutable"]: 44 | self.__dict__[key] = value 45 | return 46 | 47 | t = self 48 | keylist = key.split(".") 49 | for k in keylist[:-1]: 50 | t = t.__getattr__(k, create_if_not_exist) 51 | 52 | t.__getattr__(keylist[-1], create_if_not_exist) 53 | t[keylist[-1]] = value 54 | 55 | def __getattr__(self, key, create_if_not_exist=True): 56 | """ 57 | 58 | :param key: 59 | :param create_if_not_exist: 60 | :return: 61 | """ 62 | if key in ["immutable"]: 63 | return self.__dict__[key] 64 | 65 | if key not in self: 66 | if not create_if_not_exist: 67 | raise KeyError 68 | self[key] = Config() 69 | if isinstance(self[key], dict): 70 | self[key] = Config(self[key]) 71 | return self[key] 72 | 73 | def __setitem__(self, key, value): 74 | """ 75 | 76 | :param key: 77 | :param value: 78 | :return: 79 | """ 80 | if self.immutable: 81 | raise AttributeError( 82 | 'Attempted to set "{}" to "{}", but SegConfig is immutable'. 83 | format(key, value)) 84 | # 85 | if isinstance(value, str): 86 | try: 87 | value = literal_eval(value) 88 | except ValueError: 89 | pass 90 | except SyntaxError: 91 | pass 92 | super(Config, self).__setitem__(key, value) 93 | 94 | @staticmethod 95 | def _load_config_file(config_file_path): 96 | """ 97 | 98 | :param config_file_path 99 | :return: 100 | """ 101 | if not os.access(config_file_path, os.R_OK): 102 | raise OSError('Config file: {:s}, can not be read'.format(config_file_path)) 103 | with open(config_file_path, 'r') as f: 104 | config_content = yaml.safe_load(f) 105 | 106 | return config_content 107 | 108 | def update_from_config(self, other): 109 | """ 110 | 111 | :param other: 112 | :return: 113 | """ 114 | if isinstance(other, dict): 115 | other = Config(other) 116 | assert isinstance(other, Config) 117 | diclist = [("", other)] 118 | while len(diclist): 119 | prefix, tdic = diclist[0] 120 | diclist = diclist[1:] 121 | for key, value in tdic.items(): 122 | key = "{}.{}".format(prefix, key) if prefix else key 123 | if isinstance(value, dict): 124 | diclist.append((key, value)) 125 | continue 126 | try: 127 | self.__setattr__(key, value, create_if_not_exist=False) 128 | except KeyError: 129 | raise KeyError('Non-existent config key: {}'.format(key)) 130 | 131 | def check_and_infer(self): 132 | """ 133 | 134 | :return: 135 | """ 136 | if self.DATASET.IMAGE_TYPE in ['rgb', 'gray']: 137 | self.DATASET.DATA_DIM = 3 138 | elif self.DATASET.IMAGE_TYPE in ['rgba']: 139 | self.DATASET.DATA_DIM = 4 140 | else: 141 | raise KeyError( 142 | 'DATASET.IMAGE_TYPE config error, only support `rgb`, `gray` and `rgba`' 143 | ) 144 | if self.MEAN is not None: 145 | self.DATASET.PADDING_VALUE = [x * 255.0 for x in self.MEAN] 146 | 147 | if not self.TRAIN_CROP_SIZE: 148 | raise ValueError( 149 | 'TRAIN_CROP_SIZE is empty! Please set a pair of values in format (width, height)' 150 | ) 151 | 152 | if not self.EVAL_CROP_SIZE: 153 | raise ValueError( 154 | 'EVAL_CROP_SIZE is empty! Please set a pair of values in format (width, height)' 155 | ) 156 | 157 | # Ensure file list is use UTF-8 encoding 158 | train_sets = codecs.open(self.DATASET.TRAIN_FILE_LIST, 'r', 'utf-8').readlines() 159 | val_sets = codecs.open(self.DATASET.VAL_FILE_LIST, 'r', 'utf-8').readlines() 160 | test_sets = codecs.open(self.DATASET.TEST_FILE_LIST, 'r', 'utf-8').readlines() 161 | self.DATASET.TRAIN_TOTAL_IMAGES = len(train_sets) 162 | self.DATASET.VAL_TOTAL_IMAGES = len(val_sets) 163 | self.DATASET.TEST_TOTAL_IMAGES = len(test_sets) 164 | 165 | if self.MODEL.MODEL_NAME == 'icnet' and \ 166 | len(self.MODEL.MULTI_LOSS_WEIGHT) != 3: 167 | self.MODEL.MULTI_LOSS_WEIGHT = [1.0, 0.4, 0.16] 168 | 169 | def update_from_list(self, config_list): 170 | if len(config_list) % 2 != 0: 171 | raise ValueError( 172 | "Command line options config format error! Please check it: {}". 173 | format(config_list)) 174 | for key, value in zip(config_list[0::2], config_list[1::2]): 175 | try: 176 | self.__setattr__(key, value, create_if_not_exist=False) 177 | except KeyError: 178 | raise KeyError('Non-existent config key: {}'.format(key)) 179 | 180 | def update_from_file(self, config_file): 181 | """ 182 | 183 | :param config_file: 184 | :return: 185 | """ 186 | with codecs.open(config_file, 'r', 'utf-8') as f: 187 | dic = yaml.safe_load(f) 188 | self.update_from_config(dic) 189 | 190 | def set_immutable(self, immutable): 191 | """ 192 | 193 | :param immutable: 194 | :return: 195 | """ 196 | self.immutable = immutable 197 | for value in self.values(): 198 | if isinstance(value, Config): 199 | value.set_immutable(immutable) 200 | 201 | def is_immutable(self): 202 | """ 203 | 204 | :return: 205 | """ 206 | return self.immutable 207 | 208 | def dump_to_json_file(self, f_obj): 209 | """ 210 | 211 | :param f_obj: 212 | :return: 213 | """ 214 | origin_dict = dict() 215 | for key, val in self.items(): 216 | if isinstance(val, Config): 217 | origin_dict.update({key: dict(val)}) 218 | elif isinstance(val, dict): 219 | origin_dict.update({key: val}) 220 | else: 221 | raise TypeError('Not supported type {}'.format(type(val))) 222 | return json.dump(origin_dict, f_obj) 223 | 224 | r_path = os.path.dirname(os.path.abspath(__file__)) 225 | file_path = os.path.join(r_path,'tusimple_lanenet.yaml') 226 | lanenet_cfg = Config(config_path=file_path) 227 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.3) 2 | project(lka) 3 | 4 | ## Compile as C++11, supported in ROS Kinetic and newer 5 | # add_compile_options(-std=c++11) 6 | 7 | ## Find catkin macros and libraries 8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) 9 | ## is used, also find other catkin packages 10 | find_package(catkin REQUIRED COMPONENTS 11 | roscpp 12 | rospy 13 | std_msgs 14 | ) 15 | 16 | ## System dependencies are found with CMake's conventions 17 | # find_package(Boost REQUIRED COMPONENTS system) 18 | 19 | 20 | ## Uncomment this if the package has a setup.py. This macro ensures 21 | ## modules and global scripts declared therein get installed 22 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html 23 | catkin_python_setup() 24 | 25 | ################################################ 26 | ## Declare ROS messages, services and actions ## 27 | ################################################ 28 | 29 | ## To declare and build messages, services or actions from within this 30 | ## package, follow these steps: 31 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in 32 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). 33 | ## * In the file package.xml: 34 | ## * add a build_depend tag for "message_generation" 35 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET 36 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in 37 | ## but can be declared for certainty nonetheless: 38 | ## * add a exec_depend tag for "message_runtime" 39 | ## * In this file (CMakeLists.txt): 40 | ## * add "message_generation" and every package in MSG_DEP_SET to 41 | ## find_package(catkin REQUIRED COMPONENTS ...) 42 | ## * add "message_runtime" and every package in MSG_DEP_SET to 43 | ## catkin_package(CATKIN_DEPENDS ...) 44 | ## * uncomment the add_*_files sections below as needed 45 | ## and list every .msg/.srv/.action file to be processed 46 | ## * uncomment the generate_messages entry below 47 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) 48 | 49 | ## Generate messages in the 'msg' folder 50 | # add_message_files( 51 | # FILES 52 | # Message1.msg 53 | # Message2.msg 54 | # ) 55 | 56 | ## Generate services in the 'srv' folder 57 | # add_service_files( 58 | # FILES 59 | # Service1.srv 60 | # Service2.srv 61 | # ) 62 | 63 | ## Generate actions in the 'action' folder 64 | # add_action_files( 65 | # FILES 66 | # Action1.action 67 | # Action2.action 68 | # ) 69 | 70 | ## Generate added messages and services with any dependencies listed here 71 | # generate_messages( 72 | # DEPENDENCIES 73 | # std_msgs 74 | # ) 75 | 76 | ################################################ 77 | ## Declare ROS dynamic reconfigure parameters ## 78 | ################################################ 79 | 80 | ## To declare and build dynamic reconfigure parameters within this 81 | ## package, follow these steps: 82 | ## * In the file package.xml: 83 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" 84 | ## * In this file (CMakeLists.txt): 85 | ## * add "dynamic_reconfigure" to 86 | ## find_package(catkin REQUIRED COMPONENTS ...) 87 | ## * uncomment the "generate_dynamic_reconfigure_options" section below 88 | ## and list every .cfg file to be processed 89 | 90 | ## Generate dynamic reconfigure parameters in the 'cfg' folder 91 | # generate_dynamic_reconfigure_options( 92 | # cfg/DynReconf1.cfg 93 | # cfg/DynReconf2.cfg 94 | # ) 95 | 96 | ################################### 97 | ## catkin specific configuration ## 98 | ################################### 99 | ## The catkin_package macro generates cmake config files for your package 100 | ## Declare things to be passed to dependent projects 101 | ## INCLUDE_DIRS: uncomment this if your package contains header files 102 | ## LIBRARIES: libraries you create in this project that dependent projects also need 103 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need 104 | ## DEPENDS: system dependencies of this project that dependent projects also need 105 | catkin_package( 106 | INCLUDE_DIRS include 107 | # LIBRARIES LKA 108 | CATKIN_DEPENDS roscpp rospy std_msgs 109 | # DEPENDS system_lib 110 | ) 111 | 112 | ########### 113 | ## Build ## 114 | ########### 115 | 116 | ## Specify additional locations of header files 117 | ## Your package locations should be listed before other locations 118 | include_directories( 119 | # include 120 | ${catkin_INCLUDE_DIRS} 121 | include 122 | ) 123 | 124 | ## Declare a C++ library 125 | # add_library(${PROJECT_NAME} 126 | # src/${PROJECT_NAME}/LKA.cpp 127 | # ) 128 | 129 | ## Add cmake target dependencies of the library 130 | ## as an example, code may need to be generated before libraries 131 | ## either from message generation or dynamic reconfigure 132 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 133 | 134 | ## Declare a C++ executable 135 | ## With catkin_make all packages are built within a single CMake context 136 | ## The recommended prefix ensures that target names across packages don't collide 137 | # add_executable(${PROJECT_NAME}_node src/LKA_node.cpp) 138 | 139 | ## Rename C++ executable without prefix 140 | ## The above recommended prefix causes long target names, the following renames the 141 | ## target back to the shorter version for ease of user use 142 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" 143 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") 144 | 145 | ## Add cmake target dependencies of the executable 146 | ## same as for the library above 147 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 148 | 149 | ## Specify libraries to link a library or executable target against 150 | # target_link_libraries(${PROJECT_NAME}_node 151 | # ${catkin_LIBRARIES} 152 | # ) 153 | 154 | ############# 155 | ## Install ## 156 | ############# 157 | 158 | # all install targets should use catkin DESTINATION variables 159 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html 160 | 161 | ## Mark executable scripts (Python etc.) for installation 162 | ## in contrast to setup.py, you can choose the destination 163 | catkin_install_python(PROGRAMS 164 | src/LaneDetection/lane_detection.py 165 | src/LaneDetection/lane_detection_publisher.py 166 | src/LaneDetection/read_img.py 167 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 168 | ) 169 | 170 | ## Mark executables for installation 171 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html 172 | # install(TARGETS ${PROJECT_NAME}_node 173 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 174 | # ) 175 | 176 | ## Mark libraries for installation 177 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html 178 | # install(TARGETS ${PROJECT_NAME} 179 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 180 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 181 | # RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} 182 | # ) 183 | 184 | ## Mark cpp header files for installation 185 | # install(DIRECTORY include/${PROJECT_NAME}/ 186 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} 187 | # FILES_MATCHING PATTERN "*.h" 188 | # PATTERN ".svn" EXCLUDE 189 | # ) 190 | 191 | ## Mark other files for installation (e.g. launch and bag files, etc.) 192 | # install(FILES 193 | # # myfile1 194 | # # myfile2 195 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} 196 | # ) 197 | 198 | ############# 199 | ## Testing ## 200 | ############# 201 | 202 | ## Add gtest based cpp test target and link libraries 203 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_LKA.cpp) 204 | # if(TARGET ${PROJECT_NAME}-test) 205 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) 206 | # endif() 207 | 208 | ## Add folders to be run by python nosetests 209 | # catkin_add_nosetests(test) 210 | -------------------------------------------------------------------------------- /src/LaneDetection/models/LaneNet/lanenet_back_end.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 19-4-24 下午3:54 4 | # @Author : MaybeShewill-CV 5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection 6 | # @File : lanenet_back_end.py 7 | # @IDE: PyCharm 8 | """ 9 | LaneNet backend branch which is mainly used for binary and instance segmentation loss calculation 10 | """ 11 | import tensorflow as tf 12 | 13 | import lanenet_discriminative_loss 14 | import cnn_basenet 15 | 16 | 17 | class LaneNetBackEnd(cnn_basenet.CNNBaseModel): 18 | """ 19 | LaneNet backend branch which is mainly used for binary and instance segmentation loss calculation 20 | """ 21 | def __init__(self, phase, cfg): 22 | """ 23 | init lanenet backend 24 | :param phase: train or test 25 | """ 26 | super(LaneNetBackEnd, self).__init__() 27 | self._cfg = cfg 28 | self._phase = phase 29 | self._is_training = self._is_net_for_training() 30 | 31 | self._class_nums = self._cfg.DATASET.NUM_CLASSES 32 | self._embedding_dims = self._cfg.MODEL.EMBEDDING_FEATS_DIMS 33 | self._binary_loss_type = self._cfg.SOLVER.LOSS_TYPE 34 | 35 | def _is_net_for_training(self): 36 | """ 37 | if the net is used for training or not 38 | :return: 39 | """ 40 | if isinstance(self._phase, tf.Tensor): 41 | phase = self._phase 42 | else: 43 | phase = tf.constant(self._phase, dtype=tf.string) 44 | 45 | return tf.equal(phase, tf.constant('train', dtype=tf.string)) 46 | 47 | @classmethod 48 | def _compute_class_weighted_cross_entropy_loss(cls, onehot_labels, logits, classes_weights): 49 | """ 50 | 51 | :param onehot_labels: 52 | :param logits: 53 | :param classes_weights: 54 | :return: 55 | """ 56 | loss_weights = tf.reduce_sum(tf.multiply(onehot_labels, classes_weights), axis=3) 57 | 58 | loss = tf.losses.softmax_cross_entropy( 59 | onehot_labels=onehot_labels, 60 | logits=logits, 61 | weights=loss_weights 62 | ) 63 | 64 | return loss 65 | 66 | @classmethod 67 | def _multi_category_focal_loss(cls, onehot_labels, logits, classes_weights, gamma=2.0): 68 | """ 69 | 70 | :param onehot_labels: 71 | :param logits: 72 | :param classes_weights: 73 | :param gamma: 74 | :return: 75 | """ 76 | epsilon = 1.e-7 77 | alpha = tf.multiply(onehot_labels, classes_weights) 78 | alpha = tf.cast(alpha, tf.float32) 79 | gamma = float(gamma) 80 | y_true = tf.cast(onehot_labels, tf.float32) 81 | y_pred = tf.nn.softmax(logits, dim=-1) 82 | y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon) 83 | y_t = tf.multiply(y_true, y_pred) + tf.multiply(1-y_true, 1-y_pred) 84 | ce = -tf.log(y_t) 85 | weight = tf.pow(tf.subtract(1., y_t), gamma) 86 | fl = tf.multiply(tf.multiply(weight, ce), alpha) 87 | loss = tf.reduce_mean(fl) 88 | 89 | return loss 90 | 91 | def compute_loss(self, binary_seg_logits, binary_label, 92 | instance_seg_logits, instance_label, 93 | name, reuse): 94 | """ 95 | compute lanenet loss 96 | :param binary_seg_logits: 97 | :param binary_label: 98 | :param instance_seg_logits: 99 | :param instance_label: 100 | :param name: 101 | :param reuse: 102 | :return: 103 | """ 104 | with tf.variable_scope(name_or_scope=name, reuse=reuse): 105 | # calculate class weighted binary seg loss 106 | with tf.variable_scope(name_or_scope='binary_seg'): 107 | binary_label_onehot = tf.one_hot( 108 | tf.reshape( 109 | tf.cast(binary_label, tf.int32), 110 | shape=[binary_label.get_shape().as_list()[0], 111 | binary_label.get_shape().as_list()[1], 112 | binary_label.get_shape().as_list()[2]]), 113 | depth=self._class_nums, 114 | axis=-1 115 | ) 116 | 117 | binary_label_plain = tf.reshape( 118 | binary_label, 119 | shape=[binary_label.get_shape().as_list()[0] * 120 | binary_label.get_shape().as_list()[1] * 121 | binary_label.get_shape().as_list()[2] * 122 | binary_label.get_shape().as_list()[3]]) 123 | unique_labels, unique_id, counts = tf.unique_with_counts(binary_label_plain) 124 | counts = tf.cast(counts, tf.float32) 125 | inverse_weights = tf.divide( 126 | 1.0, 127 | tf.log(tf.add(tf.divide(counts, tf.reduce_sum(counts)), tf.constant(1.02))) 128 | ) 129 | if self._binary_loss_type == 'cross_entropy': 130 | binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss( 131 | onehot_labels=binary_label_onehot, 132 | logits=binary_seg_logits, 133 | classes_weights=inverse_weights 134 | ) 135 | elif self._binary_loss_type == 'focal': 136 | binary_segmenatation_loss = self._multi_category_focal_loss( 137 | onehot_labels=binary_label_onehot, 138 | logits=binary_seg_logits, 139 | classes_weights=inverse_weights 140 | ) 141 | else: 142 | raise NotImplementedError 143 | 144 | # calculate class weighted instance seg loss 145 | with tf.variable_scope(name_or_scope='instance_seg'): 146 | 147 | pix_bn = self.layerbn( 148 | inputdata=instance_seg_logits, is_training=self._is_training, name='pix_bn') 149 | pix_relu = self.relu(inputdata=pix_bn, name='pix_relu') 150 | pix_embedding = self.conv2d( 151 | inputdata=pix_relu, 152 | out_channel=self._embedding_dims, 153 | kernel_size=1, 154 | use_bias=False, 155 | name='pix_embedding_conv' 156 | ) 157 | pix_image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) 158 | instance_segmentation_loss, l_var, l_dist, l_reg = \ 159 | lanenet_discriminative_loss.discriminative_loss( 160 | pix_embedding, instance_label, self._embedding_dims, 161 | pix_image_shape, 0.5, 3.0, 1.0, 1.0, 0.001 162 | ) 163 | 164 | l2_reg_loss = tf.constant(0.0, tf.float32) 165 | for vv in tf.trainable_variables(): 166 | if 'bn' in vv.name or 'gn' in vv.name: 167 | continue 168 | else: 169 | l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) 170 | l2_reg_loss *= 0.001 171 | total_loss = binary_segmenatation_loss + instance_segmentation_loss + l2_reg_loss 172 | 173 | ret = { 174 | 'total_loss': total_loss, 175 | 'binary_seg_logits': binary_seg_logits, 176 | 'instance_seg_logits': pix_embedding, 177 | 'binary_seg_loss': binary_segmenatation_loss, 178 | 'discriminative_loss': instance_segmentation_loss 179 | } 180 | 181 | return ret 182 | 183 | def inference(self, binary_seg_logits, instance_seg_logits, name, reuse): 184 | """ 185 | 186 | :param binary_seg_logits: 187 | :param instance_seg_logits: 188 | :param name: 189 | :param reuse: 190 | :return: 191 | """ 192 | with tf.variable_scope(name_or_scope=name, reuse=reuse): 193 | 194 | with tf.variable_scope(name_or_scope='binary_seg'): 195 | binary_seg_score = tf.nn.softmax(logits=binary_seg_logits) 196 | binary_seg_prediction = tf.argmax(binary_seg_score, axis=-1) 197 | 198 | with tf.variable_scope(name_or_scope='instance_seg'): 199 | 200 | pix_bn = self.layerbn( 201 | inputdata=instance_seg_logits, is_training=self._is_training, name='pix_bn') 202 | pix_relu = self.relu(inputdata=pix_bn, name='pix_relu') 203 | instance_seg_prediction = self.conv2d( 204 | inputdata=pix_relu, 205 | out_channel=self._embedding_dims, 206 | kernel_size=1, 207 | use_bias=False, 208 | name='pix_embedding_conv' 209 | ) 210 | 211 | return binary_seg_prediction, instance_seg_prediction 212 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/LaneDetection/models/LaneNet/vgg16_based_fcn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 19-4-24 下午6:42 4 | # @Author : MaybeShewill-CV 5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection 6 | # @File : vgg16_based_fcn.py 7 | # @IDE: PyCharm 8 | """ 9 | Implement VGG16 based fcn net for semantic segmentation 10 | """ 11 | import collections 12 | 13 | import tensorflow as tf 14 | 15 | import cnn_basenet 16 | import parse_config_utils 17 | 18 | 19 | class VGG16FCN(cnn_basenet.CNNBaseModel): 20 | """ 21 | VGG 16 based fcn net for semantic segmentation 22 | """ 23 | def __init__(self, phase, cfg): 24 | """ 25 | 26 | """ 27 | super(VGG16FCN, self).__init__() 28 | self._cfg = cfg 29 | self._phase = phase 30 | self._is_training = self._is_net_for_training() 31 | self._net_intermediate_results = collections.OrderedDict() 32 | self._class_nums = self._cfg.DATASET.NUM_CLASSES 33 | 34 | def _is_net_for_training(self): 35 | """ 36 | if the net is used for training or not 37 | :return: 38 | """ 39 | if isinstance(self._phase, tf.Tensor): 40 | phase = self._phase 41 | else: 42 | phase = tf.constant(self._phase, dtype=tf.string) 43 | 44 | return tf.equal(phase, tf.constant('train', dtype=tf.string)) 45 | 46 | def _vgg16_conv_stage(self, input_tensor, k_size, out_dims, name, 47 | stride=1, pad='SAME', need_layer_norm=True): 48 | """ 49 | stack conv and activation in vgg16 50 | :param input_tensor: 51 | :param k_size: 52 | :param out_dims: 53 | :param name: 54 | :param stride: 55 | :param pad: 56 | :param need_layer_norm: 57 | :return: 58 | """ 59 | with tf.variable_scope(name): 60 | conv = self.conv2d( 61 | inputdata=input_tensor, out_channel=out_dims, 62 | kernel_size=k_size, stride=stride, 63 | use_bias=False, padding=pad, name='conv' 64 | ) 65 | 66 | if need_layer_norm: 67 | bn = self.layerbn(inputdata=conv, is_training=self._is_training, name='bn') 68 | 69 | relu = self.relu(inputdata=bn, name='relu') 70 | else: 71 | relu = self.relu(inputdata=conv, name='relu') 72 | 73 | return relu 74 | 75 | def _decode_block(self, input_tensor, previous_feats_tensor, 76 | out_channels_nums, name, kernel_size=4, 77 | stride=2, use_bias=False, 78 | previous_kernel_size=4, need_activate=True): 79 | """ 80 | 81 | :param input_tensor: 82 | :param previous_feats_tensor: 83 | :param out_channels_nums: 84 | :param kernel_size: 85 | :param previous_kernel_size: 86 | :param use_bias: 87 | :param stride: 88 | :param name: 89 | :return: 90 | """ 91 | with tf.variable_scope(name_or_scope=name): 92 | 93 | deconv_weights_stddev = tf.sqrt( 94 | tf.divide(tf.constant(2.0, tf.float32), 95 | tf.multiply(tf.cast(previous_kernel_size * previous_kernel_size, tf.float32), 96 | tf.cast(tf.shape(input_tensor)[3], tf.float32))) 97 | ) 98 | deconv_weights_init = tf.truncated_normal_initializer( 99 | mean=0.0, stddev=deconv_weights_stddev) 100 | 101 | deconv = self.deconv2d( 102 | inputdata=input_tensor, out_channel=out_channels_nums, kernel_size=kernel_size, 103 | stride=stride, use_bias=use_bias, w_init=deconv_weights_init, 104 | name='deconv' 105 | ) 106 | 107 | deconv = self.layerbn(inputdata=deconv, is_training=self._is_training, name='deconv_bn') 108 | 109 | deconv = self.relu(inputdata=deconv, name='deconv_relu') 110 | 111 | fuse_feats = tf.add( 112 | previous_feats_tensor, deconv, name='fuse_feats' 113 | ) 114 | 115 | if need_activate: 116 | 117 | fuse_feats = self.layerbn( 118 | inputdata=fuse_feats, is_training=self._is_training, name='fuse_gn' 119 | ) 120 | 121 | fuse_feats = self.relu(inputdata=fuse_feats, name='fuse_relu') 122 | 123 | return fuse_feats 124 | 125 | def _vgg16_fcn_encode(self, input_tensor, name): 126 | """ 127 | 128 | :param input_tensor: 129 | :param name: 130 | :return: 131 | """ 132 | with tf.variable_scope(name_or_scope=name): 133 | # encode stage 1 134 | conv_1_1 = self._vgg16_conv_stage( 135 | input_tensor=input_tensor, k_size=3, 136 | out_dims=64, name='conv1_1', 137 | need_layer_norm=True 138 | ) 139 | conv_1_2 = self._vgg16_conv_stage( 140 | input_tensor=conv_1_1, k_size=3, 141 | out_dims=64, name='conv1_2', 142 | need_layer_norm=True 143 | ) 144 | self._net_intermediate_results['encode_stage_1_share'] = { 145 | 'data': conv_1_2, 146 | 'shape': conv_1_2.get_shape().as_list() 147 | } 148 | 149 | # encode stage 2 150 | pool1 = self.maxpooling( 151 | inputdata=conv_1_2, kernel_size=2, 152 | stride=2, name='pool1' 153 | ) 154 | conv_2_1 = self._vgg16_conv_stage( 155 | input_tensor=pool1, k_size=3, 156 | out_dims=128, name='conv2_1', 157 | need_layer_norm=True 158 | ) 159 | conv_2_2 = self._vgg16_conv_stage( 160 | input_tensor=conv_2_1, k_size=3, 161 | out_dims=128, name='conv2_2', 162 | need_layer_norm=True 163 | ) 164 | self._net_intermediate_results['encode_stage_2_share'] = { 165 | 'data': conv_2_2, 166 | 'shape': conv_2_2.get_shape().as_list() 167 | } 168 | 169 | # encode stage 3 170 | pool2 = self.maxpooling( 171 | inputdata=conv_2_2, kernel_size=2, 172 | stride=2, name='pool2' 173 | ) 174 | conv_3_1 = self._vgg16_conv_stage( 175 | input_tensor=pool2, k_size=3, 176 | out_dims=256, name='conv3_1', 177 | need_layer_norm=True 178 | ) 179 | conv_3_2 = self._vgg16_conv_stage( 180 | input_tensor=conv_3_1, k_size=3, 181 | out_dims=256, name='conv3_2', 182 | need_layer_norm=True 183 | ) 184 | conv_3_3 = self._vgg16_conv_stage( 185 | input_tensor=conv_3_2, k_size=3, 186 | out_dims=256, name='conv3_3', 187 | need_layer_norm=True 188 | ) 189 | self._net_intermediate_results['encode_stage_3_share'] = { 190 | 'data': conv_3_3, 191 | 'shape': conv_3_3.get_shape().as_list() 192 | } 193 | 194 | # encode stage 4 195 | pool3 = self.maxpooling( 196 | inputdata=conv_3_3, kernel_size=2, 197 | stride=2, name='pool3' 198 | ) 199 | conv_4_1 = self._vgg16_conv_stage( 200 | input_tensor=pool3, k_size=3, 201 | out_dims=512, name='conv4_1', 202 | need_layer_norm=True 203 | ) 204 | conv_4_2 = self._vgg16_conv_stage( 205 | input_tensor=conv_4_1, k_size=3, 206 | out_dims=512, name='conv4_2', 207 | need_layer_norm=True 208 | ) 209 | conv_4_3 = self._vgg16_conv_stage( 210 | input_tensor=conv_4_2, k_size=3, 211 | out_dims=512, name='conv4_3', 212 | need_layer_norm=True 213 | ) 214 | self._net_intermediate_results['encode_stage_4_share'] = { 215 | 'data': conv_4_3, 216 | 'shape': conv_4_3.get_shape().as_list() 217 | } 218 | 219 | # encode stage 5 for binary segmentation 220 | pool4 = self.maxpooling( 221 | inputdata=conv_4_3, kernel_size=2, 222 | stride=2, name='pool4' 223 | ) 224 | conv_5_1_binary = self._vgg16_conv_stage( 225 | input_tensor=pool4, k_size=3, 226 | out_dims=512, name='conv5_1_binary', 227 | need_layer_norm=True 228 | ) 229 | conv_5_2_binary = self._vgg16_conv_stage( 230 | input_tensor=conv_5_1_binary, k_size=3, 231 | out_dims=512, name='conv5_2_binary', 232 | need_layer_norm=True 233 | ) 234 | conv_5_3_binary = self._vgg16_conv_stage( 235 | input_tensor=conv_5_2_binary, k_size=3, 236 | out_dims=512, name='conv5_3_binary', 237 | need_layer_norm=True 238 | ) 239 | self._net_intermediate_results['encode_stage_5_binary'] = { 240 | 'data': conv_5_3_binary, 241 | 'shape': conv_5_3_binary.get_shape().as_list() 242 | } 243 | 244 | # encode stage 5 for instance segmentation 245 | conv_5_1_instance = self._vgg16_conv_stage( 246 | input_tensor=pool4, k_size=3, 247 | out_dims=512, name='conv5_1_instance', 248 | need_layer_norm=True 249 | ) 250 | conv_5_2_instance = self._vgg16_conv_stage( 251 | input_tensor=conv_5_1_instance, k_size=3, 252 | out_dims=512, name='conv5_2_instance', 253 | need_layer_norm=True 254 | ) 255 | conv_5_3_instance = self._vgg16_conv_stage( 256 | input_tensor=conv_5_2_instance, k_size=3, 257 | out_dims=512, name='conv5_3_instance', 258 | need_layer_norm=True 259 | ) 260 | self._net_intermediate_results['encode_stage_5_instance'] = { 261 | 'data': conv_5_3_instance, 262 | 'shape': conv_5_3_instance.get_shape().as_list() 263 | } 264 | 265 | return 266 | 267 | def _vgg16_fcn_decode(self, name): 268 | """ 269 | 270 | :return: 271 | """ 272 | with tf.variable_scope(name): 273 | 274 | # decode part for binary segmentation 275 | with tf.variable_scope(name_or_scope='binary_seg_decode'): 276 | 277 | decode_stage_5_binary = self._net_intermediate_results['encode_stage_5_binary']['data'] 278 | 279 | decode_stage_4_fuse = self._decode_block( 280 | input_tensor=decode_stage_5_binary, 281 | previous_feats_tensor=self._net_intermediate_results['encode_stage_4_share']['data'], 282 | name='decode_stage_4_fuse', out_channels_nums=512, previous_kernel_size=3 283 | ) 284 | decode_stage_3_fuse = self._decode_block( 285 | input_tensor=decode_stage_4_fuse, 286 | previous_feats_tensor=self._net_intermediate_results['encode_stage_3_share']['data'], 287 | name='decode_stage_3_fuse', out_channels_nums=256 288 | ) 289 | decode_stage_2_fuse = self._decode_block( 290 | input_tensor=decode_stage_3_fuse, 291 | previous_feats_tensor=self._net_intermediate_results['encode_stage_2_share']['data'], 292 | name='decode_stage_2_fuse', out_channels_nums=128 293 | ) 294 | decode_stage_1_fuse = self._decode_block( 295 | input_tensor=decode_stage_2_fuse, 296 | previous_feats_tensor=self._net_intermediate_results['encode_stage_1_share']['data'], 297 | name='decode_stage_1_fuse', out_channels_nums=64 298 | ) 299 | binary_final_logits_conv_weights_stddev = tf.sqrt( 300 | tf.divide(tf.constant(2.0, tf.float32), 301 | tf.multiply(4.0 * 4.0, 302 | tf.cast(tf.shape(decode_stage_1_fuse)[3], tf.float32))) 303 | ) 304 | binary_final_logits_conv_weights_init = tf.truncated_normal_initializer( 305 | mean=0.0, stddev=binary_final_logits_conv_weights_stddev) 306 | 307 | binary_final_logits = self.conv2d( 308 | inputdata=decode_stage_1_fuse, 309 | out_channel=self._class_nums, 310 | kernel_size=1, use_bias=False, 311 | w_init=binary_final_logits_conv_weights_init, 312 | name='binary_final_logits' 313 | ) 314 | 315 | self._net_intermediate_results['binary_segment_logits'] = { 316 | 'data': binary_final_logits, 317 | 'shape': binary_final_logits.get_shape().as_list() 318 | } 319 | 320 | with tf.variable_scope(name_or_scope='instance_seg_decode'): 321 | 322 | decode_stage_5_instance = self._net_intermediate_results['encode_stage_5_instance']['data'] 323 | 324 | decode_stage_4_fuse = self._decode_block( 325 | input_tensor=decode_stage_5_instance, 326 | previous_feats_tensor=self._net_intermediate_results['encode_stage_4_share']['data'], 327 | name='decode_stage_4_fuse', out_channels_nums=512, previous_kernel_size=3) 328 | 329 | decode_stage_3_fuse = self._decode_block( 330 | input_tensor=decode_stage_4_fuse, 331 | previous_feats_tensor=self._net_intermediate_results['encode_stage_3_share']['data'], 332 | name='decode_stage_3_fuse', out_channels_nums=256) 333 | 334 | decode_stage_2_fuse = self._decode_block( 335 | input_tensor=decode_stage_3_fuse, 336 | previous_feats_tensor=self._net_intermediate_results['encode_stage_2_share']['data'], 337 | name='decode_stage_2_fuse', out_channels_nums=128) 338 | 339 | decode_stage_1_fuse = self._decode_block( 340 | input_tensor=decode_stage_2_fuse, 341 | previous_feats_tensor=self._net_intermediate_results['encode_stage_1_share']['data'], 342 | name='decode_stage_1_fuse', out_channels_nums=64, need_activate=False) 343 | 344 | self._net_intermediate_results['instance_segment_logits'] = { 345 | 'data': decode_stage_1_fuse, 346 | 'shape': decode_stage_1_fuse.get_shape().as_list() 347 | } 348 | 349 | def build_model(self, input_tensor, name, reuse=False): 350 | """ 351 | 352 | :param input_tensor: 353 | :param name: 354 | :param reuse: 355 | :return: 356 | """ 357 | with tf.variable_scope(name_or_scope=name, reuse=reuse): 358 | # vgg16 fcn encode part 359 | self._vgg16_fcn_encode(input_tensor=input_tensor, name='vgg16_encode_module') 360 | # vgg16 fcn decode part 361 | self._vgg16_fcn_decode(name='vgg16_decode_module') 362 | 363 | return self._net_intermediate_results 364 | 365 | 366 | if __name__ == '__main__': 367 | """ 368 | test code 369 | """ 370 | test_in_tensor = tf.placeholder(dtype=tf.float32, shape=[1, 256, 512, 3], name='input') 371 | model = VGG16FCN(phase='train', cfg=parse_config_utils.lanenet_cfg) 372 | ret = model.build_model(test_in_tensor, name='vgg16fcn') 373 | for layer_name, layer_info in ret.items(): 374 | print('layer name: {:s} shape: {}'.format(layer_name, layer_info['shape'])) 375 | -------------------------------------------------------------------------------- /src/LaneDetection/models/LaneNet/lanenet_postprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 18-5-30 上午10:04 4 | # @Author : MaybeShewill-CV 5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection 6 | # @File : lanenet_postprocess.py 7 | # @IDE: PyCharm Community Edition 8 | """ 9 | LaneNet model post process 10 | """ 11 | import os.path as ops 12 | import math 13 | 14 | import cv2 15 | import glog as log 16 | import numpy as np 17 | from sklearn.cluster import DBSCAN 18 | from sklearn.preprocessing import StandardScaler 19 | 20 | 21 | def _morphological_process(image, kernel_size=5): 22 | """ 23 | morphological process to fill the hole in the binary segmentation result 24 | :param image: 25 | :param kernel_size: 26 | :return: 27 | """ 28 | if len(image.shape) == 3: 29 | raise ValueError('Binary segmentation result image should be a single channel image') 30 | 31 | if image.dtype is not np.uint8: 32 | image = np.array(image, np.uint8) 33 | 34 | kernel = cv2.getStructuringElement(shape=cv2.MORPH_ELLIPSE, ksize=(kernel_size, kernel_size)) 35 | 36 | # close operation fille hole 37 | closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=1) 38 | 39 | return closing 40 | 41 | 42 | def _connect_components_analysis(image): 43 | """ 44 | connect components analysis to remove the small components 45 | :param image: 46 | :return: 47 | """ 48 | if len(image.shape) == 3: 49 | gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 50 | else: 51 | gray_image = image 52 | 53 | return cv2.connectedComponentsWithStats(gray_image, connectivity=8, ltype=cv2.CV_32S) 54 | 55 | 56 | class _LaneFeat(object): 57 | """ 58 | 59 | """ 60 | def __init__(self, feat, coord, class_id=-1): 61 | """ 62 | lane feat object 63 | :param feat: lane embeddng feats [feature_1, feature_2, ...] 64 | :param coord: lane coordinates [x, y] 65 | :param class_id: lane class id 66 | """ 67 | self._feat = feat 68 | self._coord = coord 69 | self._class_id = class_id 70 | 71 | @property 72 | def feat(self): 73 | """ 74 | 75 | :return: 76 | """ 77 | return self._feat 78 | 79 | @feat.setter 80 | def feat(self, value): 81 | """ 82 | 83 | :param value: 84 | :return: 85 | """ 86 | if not isinstance(value, np.ndarray): 87 | value = np.array(value, dtype=np.float64) 88 | 89 | if value.dtype != np.float32: 90 | value = np.array(value, dtype=np.float64) 91 | 92 | self._feat = value 93 | 94 | @property 95 | def coord(self): 96 | """ 97 | 98 | :return: 99 | """ 100 | return self._coord 101 | 102 | @coord.setter 103 | def coord(self, value): 104 | """ 105 | 106 | :param value: 107 | :return: 108 | """ 109 | if not isinstance(value, np.ndarray): 110 | value = np.array(value) 111 | 112 | if value.dtype != np.int32: 113 | value = np.array(value, dtype=np.int32) 114 | 115 | self._coord = value 116 | 117 | @property 118 | def class_id(self): 119 | """ 120 | 121 | :return: 122 | """ 123 | return self._class_id 124 | 125 | @class_id.setter 126 | def class_id(self, value): 127 | """ 128 | 129 | :param value: 130 | :return: 131 | """ 132 | if not isinstance(value, np.int64): 133 | raise ValueError('Class id must be integer') 134 | 135 | self._class_id = value 136 | 137 | 138 | class _LaneNetCluster(object): 139 | """ 140 | Instance segmentation result cluster 141 | """ 142 | 143 | def __init__(self, cfg): 144 | """ 145 | 146 | """ 147 | self._color_map = [np.array([255, 0, 0]), 148 | np.array([0, 255, 0]), 149 | np.array([0, 0, 255]), 150 | np.array([125, 125, 0]), 151 | np.array([0, 125, 125]), 152 | np.array([125, 0, 125]), 153 | np.array([50, 100, 50]), 154 | np.array([100, 50, 100])] 155 | self._cfg = cfg 156 | 157 | def _embedding_feats_dbscan_cluster(self, embedding_image_feats): 158 | """ 159 | dbscan cluster 160 | :param embedding_image_feats: 161 | :return: 162 | """ 163 | db = DBSCAN(eps=self._cfg.POSTPROCESS.DBSCAN_EPS, min_samples=self._cfg.POSTPROCESS.DBSCAN_MIN_SAMPLES) 164 | try: 165 | features = StandardScaler().fit_transform(embedding_image_feats) 166 | db.fit(features) 167 | except Exception as err: 168 | log.error(err) 169 | ret = { 170 | 'origin_features': None, 171 | 'cluster_nums': 0, 172 | 'db_labels': None, 173 | 'unique_labels': None, 174 | 'cluster_center': None 175 | } 176 | return ret 177 | db_labels = db.labels_ 178 | unique_labels = np.unique(db_labels) 179 | 180 | num_clusters = len(unique_labels) 181 | cluster_centers = db.components_ 182 | 183 | ret = { 184 | 'origin_features': features, 185 | 'cluster_nums': num_clusters, 186 | 'db_labels': db_labels, 187 | 'unique_labels': unique_labels, 188 | 'cluster_center': cluster_centers 189 | } 190 | 191 | return ret 192 | 193 | @staticmethod 194 | def _get_lane_embedding_feats(binary_seg_ret, instance_seg_ret): 195 | """ 196 | get lane embedding features according the binary seg result 197 | :param binary_seg_ret: 198 | :param instance_seg_ret: 199 | :return: 200 | """ 201 | idx = np.where(binary_seg_ret == 255) 202 | lane_embedding_feats = instance_seg_ret[idx] 203 | # idx_scale = np.vstack((idx[0] / 256.0, idx[1] / 512.0)).transpose() 204 | # lane_embedding_feats = np.hstack((lane_embedding_feats, idx_scale)) 205 | lane_coordinate = np.vstack((idx[1], idx[0])).transpose() 206 | 207 | assert lane_embedding_feats.shape[0] == lane_coordinate.shape[0] 208 | 209 | ret = { 210 | 'lane_embedding_feats': lane_embedding_feats, 211 | 'lane_coordinates': lane_coordinate 212 | } 213 | 214 | return ret 215 | 216 | def apply_lane_feats_cluster(self, binary_seg_result, instance_seg_result): 217 | """ 218 | 219 | :param binary_seg_result: 220 | :param instance_seg_result: 221 | :return: 222 | """ 223 | # get embedding feats and coords 224 | get_lane_embedding_feats_result = self._get_lane_embedding_feats( 225 | binary_seg_ret=binary_seg_result, 226 | instance_seg_ret=instance_seg_result 227 | ) 228 | 229 | # dbscan cluster 230 | dbscan_cluster_result = self._embedding_feats_dbscan_cluster( 231 | embedding_image_feats=get_lane_embedding_feats_result['lane_embedding_feats'] 232 | ) 233 | 234 | mask = np.zeros(shape=[binary_seg_result.shape[0], binary_seg_result.shape[1], 3], dtype=np.uint8) 235 | db_labels = dbscan_cluster_result['db_labels'] 236 | unique_labels = dbscan_cluster_result['unique_labels'] 237 | coord = get_lane_embedding_feats_result['lane_coordinates'] 238 | 239 | if db_labels is None: 240 | return None, None 241 | 242 | lane_coords = [] 243 | 244 | for index, label in enumerate(unique_labels.tolist()): 245 | if label == -1: 246 | continue 247 | idx = np.where(db_labels == label) 248 | pix_coord_idx = tuple((coord[idx][:, 1], coord[idx][:, 0])) 249 | mask[pix_coord_idx] = self._color_map[index] 250 | lane_coords.append(coord[idx]) 251 | 252 | return mask, lane_coords 253 | 254 | 255 | class LaneNetPostProcessor(object): 256 | """ 257 | lanenet post process for lane generation 258 | """ 259 | def __init__(self, cfg, ipm_remap_file_path='./data/tusimple_ipm_remap.yml'): 260 | """ 261 | 262 | :param ipm_remap_file_path: ipm generate file path 263 | """ 264 | assert ops.exists(ipm_remap_file_path), '{:s} not exist'.format(ipm_remap_file_path) 265 | 266 | self._cfg = cfg 267 | self._cluster = _LaneNetCluster(cfg=cfg) 268 | self._ipm_remap_file_path = ipm_remap_file_path 269 | 270 | remap_file_load_ret = self._load_remap_matrix() 271 | self._remap_to_ipm_x = remap_file_load_ret['remap_to_ipm_x'] 272 | self._remap_to_ipm_y = remap_file_load_ret['remap_to_ipm_y'] 273 | 274 | self._color_map = [np.array([255, 0, 0]), 275 | np.array([0, 255, 0]), 276 | np.array([0, 0, 255]), 277 | np.array([125, 125, 0]), 278 | np.array([0, 125, 125]), 279 | np.array([125, 0, 125]), 280 | np.array([50, 100, 50]), 281 | np.array([100, 50, 100])] 282 | 283 | def _load_remap_matrix(self): 284 | """ 285 | 286 | :return: 287 | """ 288 | fs = cv2.FileStorage(self._ipm_remap_file_path, cv2.FILE_STORAGE_READ) 289 | 290 | remap_to_ipm_x = fs.getNode('remap_ipm_x').mat() 291 | remap_to_ipm_y = fs.getNode('remap_ipm_y').mat() 292 | 293 | ret = { 294 | 'remap_to_ipm_x': remap_to_ipm_x, 295 | 'remap_to_ipm_y': remap_to_ipm_y, 296 | } 297 | 298 | fs.release() 299 | 300 | return ret 301 | 302 | def postprocess(self, binary_seg_result, instance_seg_result=None, 303 | min_area_threshold=100, source_image=None, 304 | data_source='tusimple'): 305 | """ 306 | 307 | :param binary_seg_result: 308 | :param instance_seg_result: 309 | :param min_area_threshold: 310 | :param source_image: 311 | :param data_source: 312 | :return: 313 | """ 314 | # convert binary_seg_result 315 | binary_seg_result = np.array(binary_seg_result * 255, dtype=np.uint8) 316 | 317 | # apply image morphology operation to fill in the hold and reduce the small area 318 | morphological_ret = _morphological_process(binary_seg_result, kernel_size=5) 319 | 320 | connect_components_analysis_ret = _connect_components_analysis(image=morphological_ret) 321 | 322 | labels = connect_components_analysis_ret[1] 323 | stats = connect_components_analysis_ret[2] 324 | for index, stat in enumerate(stats): 325 | if stat[4] <= min_area_threshold: 326 | idx = np.where(labels == index) 327 | morphological_ret[idx] = 0 328 | 329 | # apply embedding features cluster 330 | mask_image, lane_coords = self._cluster.apply_lane_feats_cluster( 331 | binary_seg_result=morphological_ret, 332 | instance_seg_result=instance_seg_result 333 | ) 334 | 335 | if mask_image is None: 336 | return { 337 | 'mask_image': None, 338 | 'fit_params': None, 339 | 'source_image': None, 340 | } 341 | 342 | # lane line fit 343 | fit_params = [] 344 | src_lane_pts = [] # lane pts every single lane 345 | for lane_index, coords in enumerate(lane_coords): 346 | if data_source == 'tusimple': 347 | tmp_mask = np.zeros(shape=(720, 1280), dtype=np.uint8) 348 | tmp_mask[tuple((np.int_(coords[:, 1] * 720 / 256), np.int_(coords[:, 0] * 1280 / 512)))] = 255 349 | else: 350 | raise ValueError('Wrong data source now only support tusimple') 351 | tmp_ipm_mask = cv2.remap( 352 | tmp_mask, 353 | self._remap_to_ipm_x, 354 | self._remap_to_ipm_y, 355 | interpolation=cv2.INTER_NEAREST 356 | ) 357 | nonzero_y = np.array(tmp_ipm_mask.nonzero()[0]) 358 | nonzero_x = np.array(tmp_ipm_mask.nonzero()[1]) 359 | 360 | fit_param = np.polyfit(nonzero_y, nonzero_x, 2) 361 | fit_params.append(fit_param) 362 | 363 | [ipm_image_height, ipm_image_width] = tmp_ipm_mask.shape 364 | plot_y = np.linspace(10, ipm_image_height, ipm_image_height - 10) 365 | fit_x = fit_param[0] * plot_y ** 2 + fit_param[1] * plot_y + fit_param[2] 366 | # fit_x = fit_param[0] * plot_y ** 3 + fit_param[1] * plot_y ** 2 + fit_param[2] * plot_y + fit_param[3] 367 | 368 | lane_pts = [] 369 | for index in range(0, plot_y.shape[0], 5): 370 | src_x = self._remap_to_ipm_x[ 371 | int(plot_y[index]), int(np.clip(fit_x[index], 0, ipm_image_width - 1))] 372 | if src_x <= 0: 373 | continue 374 | src_y = self._remap_to_ipm_y[ 375 | int(plot_y[index]), int(np.clip(fit_x[index], 0, ipm_image_width - 1))] 376 | src_y = src_y if src_y > 0 else 0 377 | 378 | lane_pts.append([src_x, src_y]) 379 | 380 | src_lane_pts.append(lane_pts) 381 | 382 | # tusimple test data sample point along y axis every 10 pixels 383 | source_image_width = source_image.shape[1] 384 | for index, single_lane_pts in enumerate(src_lane_pts): 385 | single_lane_pt_x = np.array(single_lane_pts, dtype=np.float32)[:, 0] 386 | single_lane_pt_y = np.array(single_lane_pts, dtype=np.float32)[:, 1] 387 | if data_source == 'tusimple': 388 | start_plot_y = 240 389 | end_plot_y = 720 390 | else: 391 | raise ValueError('Wrong data source now only support tusimple') 392 | step = int(math.floor((end_plot_y - start_plot_y) / 10)) 393 | for plot_y in np.linspace(start_plot_y, end_plot_y, step): 394 | diff = single_lane_pt_y - plot_y 395 | fake_diff_bigger_than_zero = diff.copy() 396 | fake_diff_smaller_than_zero = diff.copy() 397 | fake_diff_bigger_than_zero[np.where(diff <= 0)] = float('inf') 398 | fake_diff_smaller_than_zero[np.where(diff > 0)] = float('-inf') 399 | idx_low = np.argmax(fake_diff_smaller_than_zero) 400 | idx_high = np.argmin(fake_diff_bigger_than_zero) 401 | 402 | previous_src_pt_x = single_lane_pt_x[idx_low] 403 | previous_src_pt_y = single_lane_pt_y[idx_low] 404 | last_src_pt_x = single_lane_pt_x[idx_high] 405 | last_src_pt_y = single_lane_pt_y[idx_high] 406 | 407 | if previous_src_pt_y < start_plot_y or last_src_pt_y < start_plot_y or \ 408 | fake_diff_smaller_than_zero[idx_low] == float('-inf') or \ 409 | fake_diff_bigger_than_zero[idx_high] == float('inf'): 410 | continue 411 | 412 | interpolation_src_pt_x = (abs(previous_src_pt_y - plot_y) * previous_src_pt_x + 413 | abs(last_src_pt_y - plot_y) * last_src_pt_x) / \ 414 | (abs(previous_src_pt_y - plot_y) + abs(last_src_pt_y - plot_y)) 415 | interpolation_src_pt_y = (abs(previous_src_pt_y - plot_y) * previous_src_pt_y + 416 | abs(last_src_pt_y - plot_y) * last_src_pt_y) / \ 417 | (abs(previous_src_pt_y - plot_y) + abs(last_src_pt_y - plot_y)) 418 | 419 | if interpolation_src_pt_x > source_image_width or interpolation_src_pt_x < 10: 420 | continue 421 | 422 | lane_color = self._color_map[index].tolist() 423 | cv2.circle(source_image, (int(interpolation_src_pt_x), 424 | int(interpolation_src_pt_y)), 5, lane_color, -1) 425 | ret = { 426 | 'mask_image': mask_image, 427 | 'fit_params': fit_params, 428 | 'source_image': source_image, 429 | } 430 | 431 | return ret 432 | -------------------------------------------------------------------------------- /src/LaneDetection/models/LaneNet/cnn_basenet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-9-18 下午3:59 4 | # @Author : MaybeShewill-CV 5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection 6 | # @File : cnn_basenet.py 7 | # @IDE: PyCharm Community Edition 8 | """ 9 | The base convolution neural networks mainly implement some useful cnn functions 10 | """ 11 | import tensorflow as tf 12 | import numpy as np 13 | 14 | 15 | class CNNBaseModel(object): 16 | """ 17 | Base model for other specific cnn ctpn_models 18 | """ 19 | 20 | def __init__(self): 21 | pass 22 | 23 | @staticmethod 24 | def conv2d(inputdata, out_channel, kernel_size, padding='SAME', 25 | stride=1, w_init=None, b_init=None, 26 | split=1, use_bias=True, data_format='NHWC', name=None): 27 | """ 28 | Packing the tensorflow conv2d function. 29 | :param name: op name 30 | :param inputdata: A 4D tensorflow tensor which ust have known number of channels, but can have other 31 | unknown dimensions. 32 | :param out_channel: number of output channel. 33 | :param kernel_size: int so only support square kernel convolution 34 | :param padding: 'VALID' or 'SAME' 35 | :param stride: int so only support square stride 36 | :param w_init: initializer for convolution weights 37 | :param b_init: initializer for bias 38 | :param split: split channels as used in Alexnet mainly group for GPU memory save. 39 | :param use_bias: whether to use bias. 40 | :param data_format: default set to NHWC according tensorflow 41 | :return: tf.Tensor named ``output`` 42 | """ 43 | with tf.variable_scope(name): 44 | in_shape = inputdata.get_shape().as_list() 45 | channel_axis = 3 if data_format == 'NHWC' else 1 46 | in_channel = in_shape[channel_axis] 47 | assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!" 48 | assert in_channel % split == 0 49 | assert out_channel % split == 0 50 | 51 | padding = padding.upper() 52 | 53 | if isinstance(kernel_size, list): 54 | filter_shape = [kernel_size[0], kernel_size[1]] + [in_channel / split, out_channel] 55 | else: 56 | filter_shape = [kernel_size, kernel_size] + [in_channel / split, out_channel] 57 | 58 | if isinstance(stride, list): 59 | strides = [1, stride[0], stride[1], 1] if data_format == 'NHWC' \ 60 | else [1, 1, stride[0], stride[1]] 61 | else: 62 | strides = [1, stride, stride, 1] if data_format == 'NHWC' \ 63 | else [1, 1, stride, stride] 64 | 65 | if w_init is None: 66 | w_init = tf.contrib.layers.variance_scaling_initializer() 67 | if b_init is None: 68 | b_init = tf.constant_initializer() 69 | 70 | w = tf.get_variable('W', filter_shape, initializer=w_init) 71 | b = None 72 | 73 | if use_bias: 74 | b = tf.get_variable('b', [out_channel], initializer=b_init) 75 | 76 | if split == 1: 77 | conv = tf.nn.conv2d(inputdata, w, strides, padding, data_format=data_format) 78 | else: 79 | inputs = tf.split(inputdata, split, channel_axis) 80 | kernels = tf.split(w, split, 3) 81 | outputs = [tf.nn.conv2d(i, k, strides, padding, data_format=data_format) 82 | for i, k in zip(inputs, kernels)] 83 | conv = tf.concat(outputs, channel_axis) 84 | 85 | ret = tf.identity(tf.nn.bias_add(conv, b, data_format=data_format) 86 | if use_bias else conv, name=name) 87 | 88 | return ret 89 | 90 | @staticmethod 91 | def depthwise_conv(input_tensor, kernel_size, name, depth_multiplier=1, 92 | padding='SAME', stride=1): 93 | """ 94 | 95 | :param input_tensor: 96 | :param kernel_size: 97 | :param name: 98 | :param depth_multiplier: 99 | :param padding: 100 | :param stride: 101 | :return: 102 | """ 103 | with tf.variable_scope(name_or_scope=name): 104 | in_shape = input_tensor.get_shape().as_list() 105 | in_channel = in_shape[3] 106 | padding = padding.upper() 107 | 108 | depthwise_filter_shape = [kernel_size, kernel_size] + [in_channel, depth_multiplier] 109 | w_init = tf.contrib.layers.variance_scaling_initializer() 110 | 111 | depthwise_filter = tf.get_variable( 112 | name='depthwise_filter_w', shape=depthwise_filter_shape, 113 | initializer=w_init 114 | ) 115 | 116 | result = tf.nn.depthwise_conv2d( 117 | input=input_tensor, 118 | filter=depthwise_filter, 119 | strides=[1, stride, stride, 1], 120 | padding=padding, 121 | name='depthwise_conv_output' 122 | ) 123 | return result 124 | 125 | @staticmethod 126 | def relu(inputdata, name=None): 127 | """ 128 | 129 | :param name: 130 | :param inputdata: 131 | :return: 132 | """ 133 | return tf.nn.relu(features=inputdata, name=name) 134 | 135 | @staticmethod 136 | def sigmoid(inputdata, name=None): 137 | """ 138 | 139 | :param name: 140 | :param inputdata: 141 | :return: 142 | """ 143 | return tf.nn.sigmoid(x=inputdata, name=name) 144 | 145 | @staticmethod 146 | def maxpooling(inputdata, kernel_size, stride=None, padding='VALID', 147 | data_format='NHWC', name=None): 148 | """ 149 | 150 | :param name: 151 | :param inputdata: 152 | :param kernel_size: 153 | :param stride: 154 | :param padding: 155 | :param data_format: 156 | :return: 157 | """ 158 | padding = padding.upper() 159 | 160 | if stride is None: 161 | stride = kernel_size 162 | 163 | if isinstance(kernel_size, list): 164 | kernel = [1, kernel_size[0], kernel_size[1], 1] if data_format == 'NHWC' else \ 165 | [1, 1, kernel_size[0], kernel_size[1]] 166 | else: 167 | kernel = [1, kernel_size, kernel_size, 1] if data_format == 'NHWC' \ 168 | else [1, 1, kernel_size, kernel_size] 169 | 170 | if isinstance(stride, list): 171 | strides = [1, stride[0], stride[1], 1] if data_format == 'NHWC' \ 172 | else [1, 1, stride[0], stride[1]] 173 | else: 174 | strides = [1, stride, stride, 1] if data_format == 'NHWC' \ 175 | else [1, 1, stride, stride] 176 | 177 | return tf.nn.max_pool(value=inputdata, ksize=kernel, strides=strides, padding=padding, 178 | data_format=data_format, name=name) 179 | 180 | @staticmethod 181 | def avgpooling(inputdata, kernel_size, stride=None, padding='VALID', 182 | data_format='NHWC', name=None): 183 | """ 184 | 185 | :param name: 186 | :param inputdata: 187 | :param kernel_size: 188 | :param stride: 189 | :param padding: 190 | :param data_format: 191 | :return: 192 | """ 193 | if stride is None: 194 | stride = kernel_size 195 | 196 | kernel = [1, kernel_size, kernel_size, 1] if data_format == 'NHWC' \ 197 | else [1, 1, kernel_size, kernel_size] 198 | 199 | strides = [1, stride, stride, 1] if data_format == 'NHWC' else [1, 1, stride, stride] 200 | 201 | return tf.nn.avg_pool(value=inputdata, ksize=kernel, strides=strides, padding=padding, 202 | data_format=data_format, name=name) 203 | 204 | @staticmethod 205 | def globalavgpooling(inputdata, data_format='NHWC', name=None): 206 | """ 207 | 208 | :param name: 209 | :param inputdata: 210 | :param data_format: 211 | :return: 212 | """ 213 | assert inputdata.shape.ndims == 4 214 | assert data_format in ['NHWC', 'NCHW'] 215 | 216 | axis = [1, 2] if data_format == 'NHWC' else [2, 3] 217 | 218 | return tf.reduce_mean(input_tensor=inputdata, axis=axis, name=name) 219 | 220 | @staticmethod 221 | def layernorm(inputdata, epsilon=1e-5, use_bias=True, use_scale=True, 222 | data_format='NHWC', name=None): 223 | """ 224 | :param name: 225 | :param inputdata: 226 | :param epsilon: epsilon to avoid divide-by-zero. 227 | :param use_bias: whether to use the extra affine transformation or not. 228 | :param use_scale: whether to use the extra affine transformation or not. 229 | :param data_format: 230 | :return: 231 | """ 232 | shape = inputdata.get_shape().as_list() 233 | ndims = len(shape) 234 | assert ndims in [2, 4] 235 | 236 | mean, var = tf.nn.moments(inputdata, list(range(1, len(shape))), keep_dims=True) 237 | 238 | if data_format == 'NCHW': 239 | channnel = shape[1] 240 | new_shape = [1, channnel, 1, 1] 241 | else: 242 | channnel = shape[-1] 243 | new_shape = [1, 1, 1, channnel] 244 | if ndims == 2: 245 | new_shape = [1, channnel] 246 | 247 | if use_bias: 248 | beta = tf.get_variable('beta', [channnel], initializer=tf.constant_initializer()) 249 | beta = tf.reshape(beta, new_shape) 250 | else: 251 | beta = tf.zeros([1] * ndims, name='beta') 252 | if use_scale: 253 | gamma = tf.get_variable('gamma', [channnel], initializer=tf.constant_initializer(1.0)) 254 | gamma = tf.reshape(gamma, new_shape) 255 | else: 256 | gamma = tf.ones([1] * ndims, name='gamma') 257 | 258 | return tf.nn.batch_normalization(inputdata, mean, var, beta, gamma, epsilon, name=name) 259 | 260 | @staticmethod 261 | def instancenorm(inputdata, epsilon=1e-5, data_format='NHWC', use_affine=True, name=None): 262 | """ 263 | 264 | :param name: 265 | :param inputdata: 266 | :param epsilon: 267 | :param data_format: 268 | :param use_affine: 269 | :return: 270 | """ 271 | shape = inputdata.get_shape().as_list() 272 | if len(shape) != 4: 273 | raise ValueError("Input data of instancebn layer has to be 4D tensor") 274 | 275 | if data_format == 'NHWC': 276 | axis = [1, 2] 277 | ch = shape[3] 278 | new_shape = [1, 1, 1, ch] 279 | else: 280 | axis = [2, 3] 281 | ch = shape[1] 282 | new_shape = [1, ch, 1, 1] 283 | if ch is None: 284 | raise ValueError("Input of instancebn require known channel!") 285 | 286 | mean, var = tf.nn.moments(inputdata, axis, keep_dims=True) 287 | 288 | if not use_affine: 289 | return tf.divide(inputdata - mean, tf.sqrt(var + epsilon), name='output') 290 | 291 | beta = tf.get_variable('beta', [ch], initializer=tf.constant_initializer()) 292 | beta = tf.reshape(beta, new_shape) 293 | gamma = tf.get_variable('gamma', [ch], initializer=tf.constant_initializer(1.0)) 294 | gamma = tf.reshape(gamma, new_shape) 295 | return tf.nn.batch_normalization(inputdata, mean, var, beta, gamma, epsilon, name=name) 296 | 297 | @staticmethod 298 | def dropout(inputdata, keep_prob, noise_shape=None, name=None): 299 | """ 300 | 301 | :param name: 302 | :param inputdata: 303 | :param keep_prob: 304 | :param noise_shape: 305 | :return: 306 | """ 307 | return tf.nn.dropout(inputdata, keep_prob=keep_prob, noise_shape=noise_shape, name=name) 308 | 309 | @staticmethod 310 | def fullyconnect(inputdata, out_dim, w_init=None, b_init=None, 311 | use_bias=True, name=None): 312 | """ 313 | Fully-Connected layer, takes a N>1D tensor and returns a 2D tensor. 314 | It is an equivalent of `tf.layers.dense` except for naming conventions. 315 | 316 | :param inputdata: a tensor to be flattened except for the first dimension. 317 | :param out_dim: output dimension 318 | :param w_init: initializer for w. Defaults to `variance_scaling_initializer`. 319 | :param b_init: initializer for b. Defaults to zero 320 | :param use_bias: whether to use bias. 321 | :param name: 322 | :return: tf.Tensor: a NC tensor named ``output`` with attribute `variables`. 323 | """ 324 | shape = inputdata.get_shape().as_list()[1:] 325 | if None not in shape: 326 | inputdata = tf.reshape(inputdata, [-1, int(np.prod(shape))]) 327 | else: 328 | inputdata = tf.reshape(inputdata, tf.stack([tf.shape(inputdata)[0], -1])) 329 | 330 | if w_init is None: 331 | w_init = tf.contrib.layers.variance_scaling_initializer() 332 | if b_init is None: 333 | b_init = tf.constant_initializer() 334 | 335 | ret = tf.layers.dense(inputs=inputdata, activation=lambda x: tf.identity(x, name='output'), 336 | use_bias=use_bias, name=name, 337 | kernel_initializer=w_init, bias_initializer=b_init, 338 | trainable=True, units=out_dim) 339 | return ret 340 | 341 | @staticmethod 342 | def layerbn(inputdata, is_training, name, scale=True): 343 | """ 344 | 345 | :param inputdata: 346 | :param is_training: 347 | :param name: 348 | :param scale: 349 | :return: 350 | """ 351 | 352 | return tf.layers.batch_normalization(inputs=inputdata, training=is_training, name=name, scale=scale) 353 | 354 | @staticmethod 355 | def layergn(inputdata, name, group_size=32, esp=1e-5): 356 | """ 357 | 358 | :param inputdata: 359 | :param name: 360 | :param group_size: 361 | :param esp: 362 | :return: 363 | """ 364 | with tf.variable_scope(name): 365 | inputdata = tf.transpose(inputdata, [0, 3, 1, 2]) 366 | n, c, h, w = inputdata.get_shape().as_list() 367 | group_size = min(group_size, c) 368 | inputdata = tf.reshape(inputdata, [-1, group_size, c // group_size, h, w]) 369 | mean, var = tf.nn.moments(inputdata, [2, 3, 4], keep_dims=True) 370 | inputdata = (inputdata - mean) / tf.sqrt(var + esp) 371 | 372 | # 每个通道的gamma和beta 373 | gamma = tf.Variable(tf.constant(1.0, shape=[c]), dtype=tf.float32, name='gamma') 374 | beta = tf.Variable(tf.constant(0.0, shape=[c]), dtype=tf.float32, name='beta') 375 | gamma = tf.reshape(gamma, [1, c, 1, 1]) 376 | beta = tf.reshape(beta, [1, c, 1, 1]) 377 | 378 | # 根据论文进行转换 [n, c, h, w, c] 到 [n, h, w, c] 379 | output = tf.reshape(inputdata, [-1, c, h, w]) 380 | output = output * gamma + beta 381 | output = tf.transpose(output, [0, 2, 3, 1]) 382 | 383 | return output 384 | 385 | @staticmethod 386 | def squeeze(inputdata, axis=None, name=None): 387 | """ 388 | 389 | :param inputdata: 390 | :param axis: 391 | :param name: 392 | :return: 393 | """ 394 | return tf.squeeze(input=inputdata, axis=axis, name=name) 395 | 396 | @staticmethod 397 | def deconv2d(inputdata, out_channel, kernel_size, padding='SAME', 398 | stride=1, w_init=None, b_init=None, 399 | use_bias=True, activation=None, data_format='channels_last', 400 | trainable=True, name=None): 401 | """ 402 | Packing the tensorflow conv2d function. 403 | :param name: op name 404 | :param inputdata: A 4D tensorflow tensor which ust have known number of channels, but can have other 405 | unknown dimensions. 406 | :param out_channel: number of output channel. 407 | :param kernel_size: int so only support square kernel convolution 408 | :param padding: 'VALID' or 'SAME' 409 | :param stride: int so only support square stride 410 | :param w_init: initializer for convolution weights 411 | :param b_init: initializer for bias 412 | :param activation: whether to apply a activation func to deconv result 413 | :param use_bias: whether to use bias. 414 | :param data_format: default set to NHWC according tensorflow 415 | :return: tf.Tensor named ``output`` 416 | """ 417 | with tf.variable_scope(name): 418 | in_shape = inputdata.get_shape().as_list() 419 | channel_axis = 3 if data_format == 'channels_last' else 1 420 | in_channel = in_shape[channel_axis] 421 | assert in_channel is not None, "[Deconv2D] Input cannot have unknown channel!" 422 | 423 | padding = padding.upper() 424 | 425 | if w_init is None: 426 | w_init = tf.contrib.layers.variance_scaling_initializer() 427 | if b_init is None: 428 | b_init = tf.constant_initializer() 429 | 430 | ret = tf.layers.conv2d_transpose(inputs=inputdata, filters=out_channel, 431 | kernel_size=kernel_size, 432 | strides=stride, padding=padding, 433 | data_format=data_format, 434 | activation=activation, use_bias=use_bias, 435 | kernel_initializer=w_init, 436 | bias_initializer=b_init, trainable=trainable, 437 | name=name) 438 | return ret 439 | 440 | @staticmethod 441 | def dilation_conv(input_tensor, k_size, out_dims, rate, padding='SAME', 442 | w_init=None, b_init=None, use_bias=False, name=None): 443 | """ 444 | 445 | :param input_tensor: 446 | :param k_size: 447 | :param out_dims: 448 | :param rate: 449 | :param padding: 450 | :param w_init: 451 | :param b_init: 452 | :param use_bias: 453 | :param name: 454 | :return: 455 | """ 456 | with tf.variable_scope(name): 457 | in_shape = input_tensor.get_shape().as_list() 458 | in_channel = in_shape[3] 459 | assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!" 460 | 461 | padding = padding.upper() 462 | 463 | if isinstance(k_size, list): 464 | filter_shape = [k_size[0], k_size[1]] + [in_channel, out_dims] 465 | else: 466 | filter_shape = [k_size, k_size] + [in_channel, out_dims] 467 | 468 | if w_init is None: 469 | w_init = tf.contrib.layers.variance_scaling_initializer() 470 | if b_init is None: 471 | b_init = tf.constant_initializer() 472 | 473 | w = tf.get_variable('W', filter_shape, initializer=w_init) 474 | b = None 475 | 476 | if use_bias: 477 | b = tf.get_variable('b', [out_dims], initializer=b_init) 478 | 479 | conv = tf.nn.atrous_conv2d(value=input_tensor, filters=w, rate=rate, 480 | padding=padding, name='dilation_conv') 481 | 482 | if use_bias: 483 | ret = tf.add(conv, b) 484 | else: 485 | ret = conv 486 | 487 | return ret 488 | 489 | @staticmethod 490 | def spatial_dropout(input_tensor, keep_prob, is_training, name, seed=1234): 491 | """ 492 | 空间dropout实现 493 | :param input_tensor: 494 | :param keep_prob: 495 | :param is_training: 496 | :param name: 497 | :param seed: 498 | :return: 499 | """ 500 | 501 | def f1(): 502 | input_shape = input_tensor.get_shape().as_list() 503 | noise_shape = tf.constant(value=[input_shape[0], 1, 1, input_shape[3]]) 504 | return tf.nn.dropout(input_tensor, keep_prob, noise_shape, seed=seed, name="spatial_dropout") 505 | 506 | def f2(): 507 | return input_tensor 508 | 509 | with tf.variable_scope(name_or_scope=name): 510 | 511 | output = tf.cond(is_training, f1, f2) 512 | 513 | return output 514 | 515 | @staticmethod 516 | def lrelu(inputdata, name, alpha=0.2): 517 | """ 518 | 519 | :param inputdata: 520 | :param alpha: 521 | :param name: 522 | :return: 523 | """ 524 | with tf.variable_scope(name): 525 | return tf.nn.relu(inputdata) - alpha * tf.nn.relu(-inputdata) 526 | -------------------------------------------------------------------------------- /src/LaneDetection/models/LaneNet/bisenet_v2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2020/4/9 上午11:05 4 | # @Author : MaybeShewill-CV 5 | # @Site : https://github.com/MaybeShewill-CV/bisenetv2-tensorflow 6 | # @File : bisenet_v2.py 7 | # @IDE: PyCharm 8 | """ 9 | BiseNet V2 Model 10 | """ 11 | import collections 12 | 13 | import tensorflow as tf 14 | 15 | import cnn_basenet 16 | import parse_config_utils 17 | 18 | 19 | class _StemBlock(cnn_basenet.CNNBaseModel): 20 | """ 21 | implementation of stem block module 22 | """ 23 | def __init__(self, phase): 24 | """ 25 | 26 | :param phase: 27 | """ 28 | super(_StemBlock, self).__init__() 29 | self._phase = phase 30 | self._is_training = self._is_net_for_training() 31 | self._padding = 'SAME' 32 | 33 | def _is_net_for_training(self): 34 | """ 35 | if the net is used for training or not 36 | :return: 37 | """ 38 | if isinstance(self._phase, tf.Tensor): 39 | phase = self._phase 40 | else: 41 | phase = tf.constant(self._phase, dtype=tf.string) 42 | return tf.equal(phase, tf.constant('train', dtype=tf.string)) 43 | 44 | def _conv_block(self, input_tensor, k_size, output_channels, stride, 45 | name, padding='SAME', use_bias=False, need_activate=False): 46 | """ 47 | conv block in attention refine 48 | :param input_tensor: 49 | :param k_size: 50 | :param output_channels: 51 | :param stride: 52 | :param name: 53 | :param padding: 54 | :param use_bias: 55 | :return: 56 | """ 57 | with tf.variable_scope(name_or_scope=name): 58 | result = self.conv2d( 59 | inputdata=input_tensor, 60 | out_channel=output_channels, 61 | kernel_size=k_size, 62 | padding=padding, 63 | stride=stride, 64 | use_bias=use_bias, 65 | name='conv' 66 | ) 67 | if need_activate: 68 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 69 | result = self.relu(inputdata=result, name='relu') 70 | else: 71 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 72 | return result 73 | 74 | def __call__(self, *args, **kwargs): 75 | """ 76 | 77 | :param args: 78 | :param kwargs: 79 | :return: 80 | """ 81 | input_tensor = kwargs['input_tensor'] 82 | name_scope = kwargs['name'] 83 | output_channels = kwargs['output_channels'] 84 | if 'padding' in kwargs: 85 | self._padding = kwargs['padding'] 86 | with tf.variable_scope(name_or_scope=name_scope): 87 | input_tensor = self._conv_block( 88 | input_tensor=input_tensor, 89 | k_size=3, 90 | output_channels=output_channels, 91 | stride=2, 92 | name='conv_block_1', 93 | padding=self._padding, 94 | use_bias=False, 95 | need_activate=True 96 | ) 97 | with tf.variable_scope(name_or_scope='downsample_branch_left'): 98 | branch_left_output = self._conv_block( 99 | input_tensor=input_tensor, 100 | k_size=1, 101 | output_channels=int(output_channels / 2), 102 | stride=1, 103 | name='1x1_conv_block', 104 | padding=self._padding, 105 | use_bias=False, 106 | need_activate=True 107 | ) 108 | branch_left_output = self._conv_block( 109 | input_tensor=branch_left_output, 110 | k_size=3, 111 | output_channels=output_channels, 112 | stride=2, 113 | name='3x3_conv_block', 114 | padding=self._padding, 115 | use_bias=False, 116 | need_activate=True 117 | ) 118 | with tf.variable_scope(name_or_scope='downsample_branch_right'): 119 | branch_right_output = self.maxpooling( 120 | inputdata=input_tensor, 121 | kernel_size=3, 122 | stride=2, 123 | padding=self._padding, 124 | name='maxpooling_block' 125 | ) 126 | result = tf.concat([branch_left_output, branch_right_output], axis=-1, name='concate_features') 127 | result = self._conv_block( 128 | input_tensor=result, 129 | k_size=3, 130 | output_channels=output_channels, 131 | stride=1, 132 | name='final_conv_block', 133 | padding=self._padding, 134 | use_bias=False, 135 | need_activate=True 136 | ) 137 | return result 138 | 139 | 140 | class _ContextEmbedding(cnn_basenet.CNNBaseModel): 141 | """ 142 | implementation of context embedding module in bisenetv2 143 | """ 144 | def __init__(self, phase): 145 | """ 146 | 147 | :param phase: 148 | """ 149 | super(_ContextEmbedding, self).__init__() 150 | self._phase = phase 151 | self._is_training = self._is_net_for_training() 152 | self._padding = 'SAME' 153 | 154 | def _is_net_for_training(self): 155 | """ 156 | if the net is used for training or not 157 | :return: 158 | """ 159 | if isinstance(self._phase, tf.Tensor): 160 | phase = self._phase 161 | else: 162 | phase = tf.constant(self._phase, dtype=tf.string) 163 | return tf.equal(phase, tf.constant('train', dtype=tf.string)) 164 | 165 | def _conv_block(self, input_tensor, k_size, output_channels, stride, 166 | name, padding='SAME', use_bias=False, need_activate=False): 167 | """ 168 | conv block in attention refine 169 | :param input_tensor: 170 | :param k_size: 171 | :param output_channels: 172 | :param stride: 173 | :param name: 174 | :param padding: 175 | :param use_bias: 176 | :return: 177 | """ 178 | with tf.variable_scope(name_or_scope=name): 179 | result = self.conv2d( 180 | inputdata=input_tensor, 181 | out_channel=output_channels, 182 | kernel_size=k_size, 183 | padding=padding, 184 | stride=stride, 185 | use_bias=use_bias, 186 | name='conv' 187 | ) 188 | if need_activate: 189 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 190 | result = self.relu(inputdata=result, name='relu') 191 | else: 192 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 193 | return result 194 | 195 | def __call__(self, *args, **kwargs): 196 | """ 197 | 198 | :param args: 199 | :param kwargs: 200 | :return: 201 | """ 202 | input_tensor = kwargs['input_tensor'] 203 | name_scope = kwargs['name'] 204 | output_channels = input_tensor.get_shape().as_list()[-1] 205 | if 'padding' in kwargs: 206 | self._padding = kwargs['padding'] 207 | with tf.variable_scope(name_or_scope=name_scope): 208 | result = tf.reduce_mean(input_tensor, axis=[1, 2], keepdims=True, name='global_avg_pooling') 209 | result = self.layerbn(result, self._is_training, 'bn') 210 | result = self._conv_block( 211 | input_tensor=result, 212 | k_size=1, 213 | output_channels=output_channels, 214 | stride=1, 215 | name='conv_block_1', 216 | padding=self._padding, 217 | use_bias=False, 218 | need_activate=True 219 | ) 220 | result = tf.add(result, input_tensor, name='fused_features') 221 | result = self.conv2d( 222 | inputdata=result, 223 | out_channel=output_channels, 224 | kernel_size=3, 225 | padding=self._padding, 226 | stride=1, 227 | use_bias=False, 228 | name='final_conv_block' 229 | ) 230 | return result 231 | 232 | 233 | class _GatherExpansion(cnn_basenet.CNNBaseModel): 234 | """ 235 | implementation of gather and expansion module in bisenetv2 236 | """ 237 | def __init__(self, phase): 238 | """ 239 | 240 | :param phase: 241 | """ 242 | super(_GatherExpansion, self).__init__() 243 | self._phase = phase 244 | self._is_training = self._is_net_for_training() 245 | self._padding = 'SAME' 246 | self._stride = 1 247 | self._expansion_factor = 6 248 | 249 | def _is_net_for_training(self): 250 | """ 251 | if the net is used for training or not 252 | :return: 253 | """ 254 | if isinstance(self._phase, tf.Tensor): 255 | phase = self._phase 256 | else: 257 | phase = tf.constant(self._phase, dtype=tf.string) 258 | return tf.equal(phase, tf.constant('train', dtype=tf.string)) 259 | 260 | def _conv_block(self, input_tensor, k_size, output_channels, stride, 261 | name, padding='SAME', use_bias=False, need_activate=False): 262 | """ 263 | conv block in attention refine 264 | :param input_tensor: 265 | :param k_size: 266 | :param output_channels: 267 | :param stride: 268 | :param name: 269 | :param padding: 270 | :param use_bias: 271 | :return: 272 | """ 273 | with tf.variable_scope(name_or_scope=name): 274 | result = self.conv2d( 275 | inputdata=input_tensor, 276 | out_channel=output_channels, 277 | kernel_size=k_size, 278 | padding=padding, 279 | stride=stride, 280 | use_bias=use_bias, 281 | name='conv' 282 | ) 283 | if need_activate: 284 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 285 | result = self.relu(inputdata=result, name='relu') 286 | else: 287 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 288 | return result 289 | 290 | def _apply_ge_when_stride_equal_one(self, input_tensor, e, name): 291 | """ 292 | 293 | :param input_tensor: 294 | :param e: 295 | :param name 296 | :return: 297 | """ 298 | input_tensor_channels = input_tensor.get_shape().as_list()[-1] 299 | with tf.variable_scope(name_or_scope=name): 300 | result = self._conv_block( 301 | input_tensor=input_tensor, 302 | k_size=3, 303 | output_channels=input_tensor_channels, 304 | stride=1, 305 | name='3x3_conv_block', 306 | padding=self._padding, 307 | use_bias=False, 308 | need_activate=True 309 | ) 310 | result = self.depthwise_conv( 311 | input_tensor=result, 312 | kernel_size=3, 313 | depth_multiplier=e, 314 | padding=self._padding, 315 | stride=1, 316 | name='depthwise_conv_block' 317 | ) 318 | result = self.layerbn(result, self._is_training, name='dw_bn') 319 | result = self._conv_block( 320 | input_tensor=result, 321 | k_size=1, 322 | output_channels=input_tensor_channels, 323 | stride=1, 324 | name='1x1_conv_block', 325 | padding=self._padding, 326 | use_bias=False, 327 | need_activate=False 328 | ) 329 | result = tf.add(input_tensor, result, name='fused_features') 330 | result = self.relu(result, name='ge_output') 331 | return result 332 | 333 | def _apply_ge_when_stride_equal_two(self, input_tensor, output_channels, e, name): 334 | """ 335 | 336 | :param input_tensor: 337 | :param output_channels: 338 | :param e: 339 | :param name 340 | :return: 341 | """ 342 | input_tensor_channels = input_tensor.get_shape().as_list()[-1] 343 | with tf.variable_scope(name_or_scope=name): 344 | input_proj = self.depthwise_conv( 345 | input_tensor=input_tensor, 346 | kernel_size=3, 347 | name='input_project_dw_conv_block', 348 | depth_multiplier=1, 349 | padding=self._padding, 350 | stride=self._stride 351 | ) 352 | input_proj = self.layerbn(input_proj, self._is_training, name='input_project_bn') 353 | input_proj = self._conv_block( 354 | input_tensor=input_proj, 355 | k_size=1, 356 | output_channels=output_channels, 357 | stride=1, 358 | name='input_project_1x1_conv_block', 359 | padding=self._padding, 360 | use_bias=False, 361 | need_activate=False 362 | ) 363 | 364 | result = self._conv_block( 365 | input_tensor=input_tensor, 366 | k_size=3, 367 | output_channels=input_tensor_channels, 368 | stride=1, 369 | name='3x3_conv_block', 370 | padding=self._padding, 371 | use_bias=False, 372 | need_activate=True 373 | ) 374 | result = self.depthwise_conv( 375 | input_tensor=result, 376 | kernel_size=3, 377 | depth_multiplier=e, 378 | padding=self._padding, 379 | stride=2, 380 | name='depthwise_conv_block_1' 381 | ) 382 | result = self.layerbn(result, self._is_training, name='dw_bn_1') 383 | result = self.depthwise_conv( 384 | input_tensor=result, 385 | kernel_size=3, 386 | depth_multiplier=1, 387 | padding=self._padding, 388 | stride=1, 389 | name='depthwise_conv_block_2' 390 | ) 391 | result = self.layerbn(result, self._is_training, name='dw_bn_2') 392 | result = self._conv_block( 393 | input_tensor=result, 394 | k_size=1, 395 | output_channels=output_channels, 396 | stride=1, 397 | name='1x1_conv_block', 398 | padding=self._padding, 399 | use_bias=False, 400 | need_activate=False 401 | ) 402 | result = tf.add(input_proj, result, name='fused_features') 403 | result = self.relu(result, name='ge_output') 404 | return result 405 | 406 | def __call__(self, *args, **kwargs): 407 | """ 408 | 409 | :param args: 410 | :param kwargs: 411 | :return: 412 | """ 413 | input_tensor = kwargs['input_tensor'] 414 | name_scope = kwargs['name'] 415 | output_channels = input_tensor.get_shape().as_list()[-1] 416 | if 'output_channels' in kwargs: 417 | output_channels = kwargs['output_channels'] 418 | if 'padding' in kwargs: 419 | self._padding = kwargs['padding'] 420 | if 'stride' in kwargs: 421 | self._stride = kwargs['stride'] 422 | if 'e' in kwargs: 423 | self._expansion_factor = kwargs['e'] 424 | 425 | with tf.variable_scope(name_or_scope=name_scope): 426 | if self._stride == 1: 427 | result = self._apply_ge_when_stride_equal_one( 428 | input_tensor=input_tensor, 429 | e=self._expansion_factor, 430 | name='stride_equal_one_module' 431 | ) 432 | elif self._stride == 2: 433 | result = self._apply_ge_when_stride_equal_two( 434 | input_tensor=input_tensor, 435 | output_channels=output_channels, 436 | e=self._expansion_factor, 437 | name='stride_equal_two_module' 438 | ) 439 | else: 440 | raise NotImplementedError('No function matched with stride of {}'.format(self._stride)) 441 | return result 442 | 443 | 444 | class _GuidedAggregation(cnn_basenet.CNNBaseModel): 445 | """ 446 | implementation of guided aggregation module in bisenetv2 447 | """ 448 | 449 | def __init__(self, phase): 450 | """ 451 | 452 | :param phase: 453 | """ 454 | super(_GuidedAggregation, self).__init__() 455 | self._phase = phase 456 | self._is_training = self._is_net_for_training() 457 | self._padding = 'SAME' 458 | 459 | def _is_net_for_training(self): 460 | """ 461 | if the net is used for training or not 462 | :return: 463 | """ 464 | if isinstance(self._phase, tf.Tensor): 465 | phase = self._phase 466 | else: 467 | phase = tf.constant(self._phase, dtype=tf.string) 468 | return tf.equal(phase, tf.constant('train', dtype=tf.string)) 469 | 470 | def _conv_block(self, input_tensor, k_size, output_channels, stride, 471 | name, padding='SAME', use_bias=False, need_activate=False): 472 | """ 473 | conv block in attention refine 474 | :param input_tensor: 475 | :param k_size: 476 | :param output_channels: 477 | :param stride: 478 | :param name: 479 | :param padding: 480 | :param use_bias: 481 | :return: 482 | """ 483 | with tf.variable_scope(name_or_scope=name): 484 | result = self.conv2d( 485 | inputdata=input_tensor, 486 | out_channel=output_channels, 487 | kernel_size=k_size, 488 | padding=padding, 489 | stride=stride, 490 | use_bias=use_bias, 491 | name='conv' 492 | ) 493 | if need_activate: 494 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 495 | result = self.relu(inputdata=result, name='relu') 496 | else: 497 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 498 | return result 499 | 500 | def __call__(self, *args, **kwargs): 501 | """ 502 | 503 | :param args: 504 | :param kwargs: 505 | :return: 506 | """ 507 | detail_input_tensor = kwargs['detail_input_tensor'] 508 | semantic_input_tensor = kwargs['semantic_input_tensor'] 509 | name_scope = kwargs['name'] 510 | output_channels = detail_input_tensor.get_shape().as_list()[-1] 511 | if 'padding' in kwargs: 512 | self._padding = kwargs['padding'] 513 | 514 | with tf.variable_scope(name_or_scope=name_scope): 515 | with tf.variable_scope(name_or_scope='detail_branch'): 516 | detail_branch_remain = self.depthwise_conv( 517 | input_tensor=detail_input_tensor, 518 | kernel_size=3, 519 | name='3x3_dw_conv_block', 520 | depth_multiplier=1, 521 | padding=self._padding, 522 | stride=1 523 | ) 524 | detail_branch_remain = self.layerbn(detail_branch_remain, self._is_training, name='bn_1') 525 | detail_branch_remain = self.conv2d( 526 | inputdata=detail_branch_remain, 527 | out_channel=output_channels, 528 | kernel_size=1, 529 | padding=self._padding, 530 | stride=1, 531 | use_bias=False, 532 | name='1x1_conv_block' 533 | ) 534 | 535 | detail_branch_downsample = self._conv_block( 536 | input_tensor=detail_input_tensor, 537 | k_size=3, 538 | output_channels=output_channels, 539 | stride=2, 540 | name='3x3_conv_block', 541 | padding=self._padding, 542 | use_bias=False, 543 | need_activate=False 544 | ) 545 | detail_branch_downsample = self.avgpooling( 546 | inputdata=detail_branch_downsample, 547 | kernel_size=3, 548 | stride=2, 549 | padding=self._padding, 550 | name='avg_pooling_block' 551 | ) 552 | 553 | with tf.variable_scope(name_or_scope='semantic_branch'): 554 | semantic_branch_remain = self.depthwise_conv( 555 | input_tensor=semantic_input_tensor, 556 | kernel_size=3, 557 | name='3x3_dw_conv_block', 558 | depth_multiplier=1, 559 | padding=self._padding, 560 | stride=1 561 | ) 562 | semantic_branch_remain = self.layerbn(semantic_branch_remain, self._is_training, name='bn_1') 563 | semantic_branch_remain = self.conv2d( 564 | inputdata=semantic_branch_remain, 565 | out_channel=output_channels, 566 | kernel_size=1, 567 | padding=self._padding, 568 | stride=1, 569 | use_bias=False, 570 | name='1x1_conv_block' 571 | ) 572 | semantic_branch_remain = self.sigmoid(semantic_branch_remain, name='semantic_remain_sigmoid') 573 | 574 | semantic_branch_upsample = self._conv_block( 575 | input_tensor=semantic_input_tensor, 576 | k_size=3, 577 | output_channels=output_channels, 578 | stride=1, 579 | name='3x3_conv_block', 580 | padding=self._padding, 581 | use_bias=False, 582 | need_activate=False 583 | ) 584 | semantic_branch_upsample = tf.image.resize_bilinear( 585 | semantic_branch_upsample, 586 | detail_input_tensor.shape[1:3], 587 | name='semantic_upsample_features' 588 | ) 589 | semantic_branch_upsample = self.sigmoid(semantic_branch_upsample, name='semantic_upsample_sigmoid') 590 | 591 | with tf.variable_scope(name_or_scope='aggregation_features'): 592 | guided_features_remain = tf.multiply( 593 | detail_branch_remain, 594 | semantic_branch_upsample, 595 | name='guided_detail_features' 596 | ) 597 | guided_features_downsample = tf.multiply( 598 | detail_branch_downsample, 599 | semantic_branch_remain, 600 | name='guided_semantic_features' 601 | ) 602 | guided_features_upsample = tf.image.resize_bilinear( 603 | guided_features_downsample, 604 | detail_input_tensor.shape[1:3], 605 | name='guided_upsample_features' 606 | ) 607 | guided_features = tf.add(guided_features_remain, guided_features_upsample, name='fused_features') 608 | guided_features = self._conv_block( 609 | input_tensor=guided_features, 610 | k_size=3, 611 | output_channels=output_channels, 612 | stride=1, 613 | name='aggregation_feature_output', 614 | padding=self._padding, 615 | use_bias=False, 616 | need_activate=True 617 | ) 618 | return guided_features 619 | 620 | 621 | class _SegmentationHead(cnn_basenet.CNNBaseModel): 622 | """ 623 | implementation of segmentation head in bisenet v2 624 | """ 625 | def __init__(self, phase): 626 | """ 627 | 628 | """ 629 | super(_SegmentationHead, self).__init__() 630 | self._phase = phase 631 | self._is_training = self._is_net_for_training() 632 | self._padding = 'SAME' 633 | 634 | def _is_net_for_training(self): 635 | """ 636 | if the net is used for training or not 637 | :return: 638 | """ 639 | if isinstance(self._phase, tf.Tensor): 640 | phase = self._phase 641 | else: 642 | phase = tf.constant(self._phase, dtype=tf.string) 643 | return tf.equal(phase, tf.constant('train', dtype=tf.string)) 644 | 645 | def _conv_block(self, input_tensor, k_size, output_channels, stride, 646 | name, padding='SAME', use_bias=False, need_activate=False): 647 | """ 648 | conv block in attention refine 649 | :param input_tensor: 650 | :param k_size: 651 | :param output_channels: 652 | :param stride: 653 | :param name: 654 | :param padding: 655 | :param use_bias: 656 | :return: 657 | """ 658 | with tf.variable_scope(name_or_scope=name): 659 | result = self.conv2d( 660 | inputdata=input_tensor, 661 | out_channel=output_channels, 662 | kernel_size=k_size, 663 | padding=padding, 664 | stride=stride, 665 | use_bias=use_bias, 666 | name='conv' 667 | ) 668 | if need_activate: 669 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 670 | result = self.relu(inputdata=result, name='relu') 671 | else: 672 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 673 | return result 674 | 675 | def __call__(self, *args, **kwargs): 676 | """ 677 | 678 | :param args: 679 | :param kwargs: 680 | :return: 681 | """ 682 | input_tensor = kwargs['input_tensor'] 683 | name_scope = kwargs['name'] 684 | ratio = kwargs['upsample_ratio'] 685 | input_tensor_size = input_tensor.get_shape().as_list()[1:3] 686 | output_tensor_size = [int(tmp * ratio) for tmp in input_tensor_size] 687 | feature_dims = kwargs['feature_dims'] 688 | classes_nums = kwargs['classes_nums'] 689 | if 'padding' in kwargs: 690 | self._padding = kwargs['padding'] 691 | 692 | with tf.variable_scope(name_or_scope=name_scope): 693 | result = self._conv_block( 694 | input_tensor=input_tensor, 695 | k_size=3, 696 | output_channels=feature_dims, 697 | stride=1, 698 | name='3x3_conv_block', 699 | padding=self._padding, 700 | use_bias=False, 701 | need_activate=True 702 | ) 703 | result = self.conv2d( 704 | inputdata=result, 705 | out_channel=classes_nums, 706 | kernel_size=1, 707 | padding=self._padding, 708 | stride=1, 709 | use_bias=False, 710 | name='1x1_conv_block' 711 | ) 712 | result = tf.image.resize_bilinear( 713 | result, 714 | output_tensor_size, 715 | name='segmentation_head_logits' 716 | ) 717 | return result 718 | 719 | 720 | class BiseNetV2(cnn_basenet.CNNBaseModel): 721 | """ 722 | implementation of bisenet v2 723 | """ 724 | def __init__(self, phase, cfg): 725 | """ 726 | 727 | """ 728 | super(BiseNetV2, self).__init__() 729 | self._cfg = cfg 730 | self._phase = phase 731 | self._is_training = self._is_net_for_training() 732 | 733 | # set model hyper params 734 | self._class_nums = self._cfg.DATASET.NUM_CLASSES 735 | self._weights_decay = self._cfg.SOLVER.WEIGHT_DECAY 736 | self._loss_type = self._cfg.SOLVER.LOSS_TYPE 737 | self._enable_ohem = self._cfg.SOLVER.OHEM.ENABLE 738 | if self._enable_ohem: 739 | self._ohem_score_thresh = self._cfg.SOLVER.OHEM.SCORE_THRESH 740 | self._ohem_min_sample_nums = self._cfg.SOLVER.OHEM.MIN_SAMPLE_NUMS 741 | self._ge_expand_ratio = self._cfg.MODEL.BISENETV2.GE_EXPAND_RATIO 742 | self._semantic_channel_ratio = self._cfg.MODEL.BISENETV2.SEMANTIC_CHANNEL_LAMBDA 743 | self._seg_head_ratio = self._cfg.MODEL.BISENETV2.SEGHEAD_CHANNEL_EXPAND_RATIO 744 | 745 | # set module used in bisenetv2 746 | self._se_block = _StemBlock(phase=phase) 747 | self._context_embedding_block = _ContextEmbedding(phase=phase) 748 | self._ge_block = _GatherExpansion(phase=phase) 749 | self._guided_aggregation_block = _GuidedAggregation(phase=phase) 750 | self._seg_head_block = _SegmentationHead(phase=phase) 751 | 752 | # set detail branch channels 753 | self._detail_branch_channels = self._build_detail_branch_hyper_params() 754 | # set semantic branch channels 755 | self._semantic_branch_channels = self._build_semantic_branch_hyper_params() 756 | 757 | # set op block params 758 | self._block_maps = { 759 | 'conv_block': self._conv_block, 760 | 'se': self._se_block, 761 | 'ge': self._ge_block, 762 | 'ce': self._context_embedding_block, 763 | } 764 | 765 | self._net_intermediate_results = collections.OrderedDict() 766 | 767 | def _is_net_for_training(self): 768 | """ 769 | if the net is used for training or not 770 | :return: 771 | """ 772 | if isinstance(self._phase, tf.Tensor): 773 | phase = self._phase 774 | else: 775 | phase = tf.constant(self._phase, dtype=tf.string) 776 | return tf.equal(phase, tf.constant('train', dtype=tf.string)) 777 | 778 | @classmethod 779 | def _build_detail_branch_hyper_params(cls): 780 | """ 781 | 782 | :return: 783 | """ 784 | params = [ 785 | ('stage_1', [('conv_block', 3, 64, 2, 1), ('conv_block', 3, 64, 1, 1)]), 786 | ('stage_2', [('conv_block', 3, 64, 2, 1), ('conv_block', 3, 64, 1, 2)]), 787 | ('stage_3', [('conv_block', 3, 128, 2, 1), ('conv_block', 3, 128, 1, 2)]), 788 | ] 789 | return collections.OrderedDict(params) 790 | 791 | def _build_semantic_branch_hyper_params(self): 792 | """ 793 | 794 | :return: 795 | """ 796 | stage_1_channels = int(self._detail_branch_channels['stage_1'][0][2] * self._semantic_channel_ratio) 797 | stage_3_channels = int(self._detail_branch_channels['stage_3'][0][2] * self._semantic_channel_ratio) 798 | params = [ 799 | ('stage_1', [('se', 3, stage_1_channels, 1, 4, 1)]), 800 | ('stage_3', [('ge', 3, stage_3_channels, self._ge_expand_ratio, 2, 1), 801 | ('ge', 3, stage_3_channels, self._ge_expand_ratio, 1, 1)]), 802 | ('stage_4', [('ge', 3, stage_3_channels * 2, self._ge_expand_ratio, 2, 1), 803 | ('ge', 3, stage_3_channels * 2, self._ge_expand_ratio, 1, 1)]), 804 | ('stage_5', [('ge', 3, stage_3_channels * 4, self._ge_expand_ratio, 2, 1), 805 | ('ge', 3, stage_3_channels * 4, self._ge_expand_ratio, 1, 3), 806 | ('ce', 3, stage_3_channels * 4, self._ge_expand_ratio, 1, 1)]) 807 | ] 808 | return collections.OrderedDict(params) 809 | 810 | def _conv_block(self, input_tensor, k_size, output_channels, stride, 811 | name, padding='SAME', use_bias=False, need_activate=False): 812 | """ 813 | conv block in attention refine 814 | :param input_tensor: 815 | :param k_size: 816 | :param output_channels: 817 | :param stride: 818 | :param name: 819 | :param padding: 820 | :param use_bias: 821 | :return: 822 | """ 823 | with tf.variable_scope(name_or_scope=name): 824 | result = self.conv2d( 825 | inputdata=input_tensor, 826 | out_channel=output_channels, 827 | kernel_size=k_size, 828 | padding=padding, 829 | stride=stride, 830 | use_bias=use_bias, 831 | name='conv' 832 | ) 833 | if need_activate: 834 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 835 | result = self.relu(inputdata=result, name='relu') 836 | else: 837 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True) 838 | return result 839 | 840 | def build_detail_branch(self, input_tensor, name): 841 | """ 842 | 843 | :param input_tensor: 844 | :param name: 845 | :return: 846 | """ 847 | result = input_tensor 848 | with tf.variable_scope(name_or_scope=name): 849 | for stage_name, stage_params in self._detail_branch_channels.items(): 850 | with tf.variable_scope(stage_name): 851 | for block_index, param in enumerate(stage_params): 852 | block_op = self._block_maps[param[0]] 853 | k_size = param[1] 854 | output_channels = param[2] 855 | stride = param[3] 856 | repeat_times = param[4] 857 | for repeat_index in range(repeat_times): 858 | with tf.variable_scope(name_or_scope='conv_block_{:d}_repeat_{:d}'.format( 859 | block_index + 1, repeat_index + 1)): 860 | if stage_name == 'stage_3' and block_index == 1 and repeat_index == 1: 861 | result = block_op( 862 | input_tensor=result, 863 | k_size=k_size, 864 | output_channels=output_channels, 865 | stride=stride, 866 | name='3x3_conv', 867 | padding='SAME', 868 | use_bias=False, 869 | need_activate=False 870 | ) 871 | else: 872 | result = block_op( 873 | input_tensor=result, 874 | k_size=k_size, 875 | output_channels=output_channels, 876 | stride=stride, 877 | name='3x3_conv', 878 | padding='SAME', 879 | use_bias=False, 880 | need_activate=True 881 | ) 882 | return result 883 | 884 | def build_semantic_branch(self, input_tensor, name, prepare_data_for_booster=False): 885 | """ 886 | 887 | :param input_tensor: 888 | :param name: 889 | :param prepare_data_for_booster: 890 | :return: 891 | """ 892 | seg_head_inputs = collections.OrderedDict() 893 | result = input_tensor 894 | source_input_tensor_size = input_tensor.get_shape().as_list()[1:3] 895 | with tf.variable_scope(name_or_scope=name): 896 | for stage_name, stage_params in self._semantic_branch_channels.items(): 897 | seg_head_input = input_tensor 898 | with tf.variable_scope(stage_name): 899 | for block_index, param in enumerate(stage_params): 900 | block_op_name = param[0] 901 | block_op = self._block_maps[block_op_name] 902 | output_channels = param[2] 903 | expand_ratio = param[3] 904 | stride = param[4] 905 | repeat_times = param[5] 906 | for repeat_index in range(repeat_times): 907 | with tf.variable_scope(name_or_scope='{:s}_block_{:d}_repeat_{:d}'.format( 908 | block_op_name, block_index + 1, repeat_index + 1)): 909 | if block_op_name == 'ge': 910 | result = block_op( 911 | input_tensor=result, 912 | name='gather_expansion_block', 913 | stride=stride, 914 | e=expand_ratio, 915 | output_channels=output_channels 916 | ) 917 | seg_head_input = result 918 | elif block_op_name == 'ce': 919 | result = block_op( 920 | input_tensor=result, 921 | name='context_embedding_block' 922 | ) 923 | elif block_op_name == 'se': 924 | result = block_op( 925 | input_tensor=result, 926 | output_channels=output_channels, 927 | name='stem_block' 928 | ) 929 | seg_head_input = result 930 | else: 931 | raise NotImplementedError('Not support block type: {:s}'.format(block_op_name)) 932 | if prepare_data_for_booster: 933 | result_tensor_size = result.get_shape().as_list()[1:3] 934 | result_tensor_dims = result.get_shape().as_list()[-1] 935 | upsample_ratio = int(source_input_tensor_size[0] / result_tensor_size[0]) 936 | feature_dims = result_tensor_dims * self._seg_head_ratio 937 | seg_head_inputs[stage_name] = self._seg_head_block( 938 | input_tensor=seg_head_input, 939 | name='block_{:d}_seg_head_block'.format(block_index + 1), 940 | upsample_ratio=upsample_ratio, 941 | feature_dims=feature_dims, 942 | classes_nums=self._class_nums 943 | ) 944 | return result, seg_head_inputs 945 | 946 | def build_aggregation_branch(self, detail_output, semantic_output, name): 947 | """ 948 | 949 | :param detail_output: 950 | :param semantic_output: 951 | :param name: 952 | :return: 953 | """ 954 | with tf.variable_scope(name_or_scope=name): 955 | result = self._guided_aggregation_block( 956 | detail_input_tensor=detail_output, 957 | semantic_input_tensor=semantic_output, 958 | name='guided_aggregation_block' 959 | ) 960 | return result 961 | 962 | def build_instance_segmentation_branch(self, input_tensor, name): 963 | """ 964 | 965 | :param input_tensor: 966 | :param name: 967 | :return: 968 | """ 969 | input_tensor_size = input_tensor.get_shape().as_list()[1:3] 970 | output_tensor_size = [int(tmp * 8) for tmp in input_tensor_size] 971 | 972 | with tf.variable_scope(name_or_scope=name): 973 | output_tensor = self._conv_block( 974 | input_tensor=input_tensor, 975 | k_size=3, 976 | output_channels=64, 977 | stride=1, 978 | name='conv_3x3', 979 | use_bias=False, 980 | need_activate=True 981 | ) 982 | output_tensor = self._conv_block( 983 | input_tensor=output_tensor, 984 | k_size=1, 985 | output_channels=128, 986 | stride=1, 987 | name='conv_1x1', 988 | use_bias=False, 989 | need_activate=False 990 | ) 991 | output_tensor = tf.image.resize_bilinear( 992 | output_tensor, 993 | output_tensor_size, 994 | name='instance_logits' 995 | ) 996 | return output_tensor 997 | 998 | def build_binary_segmentation_branch(self, input_tensor, name): 999 | """ 1000 | 1001 | :param input_tensor: 1002 | :param name: 1003 | :return: 1004 | """ 1005 | input_tensor_size = input_tensor.get_shape().as_list()[1:3] 1006 | output_tensor_size = [int(tmp * 8) for tmp in input_tensor_size] 1007 | 1008 | with tf.variable_scope(name_or_scope=name): 1009 | output_tensor = self._conv_block( 1010 | input_tensor=input_tensor, 1011 | k_size=3, 1012 | output_channels=64, 1013 | stride=1, 1014 | name='conv_3x3', 1015 | use_bias=False, 1016 | need_activate=True 1017 | ) 1018 | output_tensor = self._conv_block( 1019 | input_tensor=output_tensor, 1020 | k_size=1, 1021 | output_channels=128, 1022 | stride=1, 1023 | name='conv_1x1', 1024 | use_bias=False, 1025 | need_activate=True 1026 | ) 1027 | output_tensor = self._conv_block( 1028 | input_tensor=output_tensor, 1029 | k_size=1, 1030 | output_channels=self._class_nums, 1031 | stride=1, 1032 | name='final_conv', 1033 | use_bias=False, 1034 | need_activate=False 1035 | ) 1036 | output_tensor = tf.image.resize_bilinear( 1037 | output_tensor, 1038 | output_tensor_size, 1039 | name='binary_logits' 1040 | ) 1041 | return output_tensor 1042 | 1043 | def build_model(self, input_tensor, name, reuse=False): 1044 | """ 1045 | 1046 | :param input_tensor: 1047 | :param name: 1048 | :param reuse: 1049 | :return: 1050 | """ 1051 | with tf.variable_scope(name_or_scope=name, reuse=reuse): 1052 | # build detail branch 1053 | detail_branch_output = self.build_detail_branch( 1054 | input_tensor=input_tensor, 1055 | name='detail_branch' 1056 | ) 1057 | # build semantic branch 1058 | semantic_branch_output, _ = self.build_semantic_branch( 1059 | input_tensor=input_tensor, 1060 | name='semantic_branch', 1061 | prepare_data_for_booster=False 1062 | ) 1063 | # build aggregation branch 1064 | aggregation_branch_output = self.build_aggregation_branch( 1065 | detail_output=detail_branch_output, 1066 | semantic_output=semantic_branch_output, 1067 | name='aggregation_branch' 1068 | ) 1069 | # build binary and instance segmentation branch 1070 | binary_seg_branch_output = self.build_binary_segmentation_branch( 1071 | input_tensor=aggregation_branch_output, 1072 | name='binary_segmentation_branch' 1073 | ) 1074 | instance_seg_branch_output = self.build_instance_segmentation_branch( 1075 | input_tensor=aggregation_branch_output, 1076 | name='instance_segmentation_branch' 1077 | ) 1078 | # gather frontend output result 1079 | self._net_intermediate_results['binary_segment_logits'] = { 1080 | 'data': binary_seg_branch_output, 1081 | 'shape': binary_seg_branch_output.get_shape().as_list() 1082 | } 1083 | self._net_intermediate_results['instance_segment_logits'] = { 1084 | 'data': instance_seg_branch_output, 1085 | 'shape': instance_seg_branch_output.get_shape().as_list() 1086 | } 1087 | return self._net_intermediate_results 1088 | 1089 | 1090 | if __name__ == '__main__': 1091 | """ 1092 | test code 1093 | """ 1094 | test_in_tensor = tf.placeholder(dtype=tf.float32, shape=[1, 256, 512, 3], name='input') 1095 | model = BiseNetV2(phase='train', cfg=parse_config_utils.lanenet_cfg) 1096 | ret = model.build_model(test_in_tensor, name='bisenetv2') 1097 | for layer_name, layer_info in ret.items(): 1098 | print('layer name: {:s} shape: {}'.format(layer_name, layer_info['shape'])) 1099 | 1100 | --------------------------------------------------------------------------------