├── src
└── LaneDetection
│ ├── models
│ ├── __pycache__
│ │ └── laneNet_class.cpython-37.pyc
│ ├── LaneNet
│ │ ├── lanenet_front_end.py
│ │ ├── tusimple_lanenet.yaml
│ │ ├── lanenet.py
│ │ ├── lanenet_discriminative_loss.py
│ │ ├── parse_config_utils.py
│ │ ├── lanenet_back_end.py
│ │ ├── vgg16_based_fcn.py
│ │ ├── lanenet_postprocess.py
│ │ ├── cnn_basenet.py
│ │ └── bisenet_v2.py
│ └── laneNet_class.py
│ ├── readMe.md
│ ├── read_img.py
│ └── lane_detection_publisher.py
├── setup.py
├── README.md
├── .gitignore
├── package.xml
├── CMakeLists.txt
└── LICENSE
/src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Erendrgnl/Carla-Ros-Lane-Keeping-System/HEAD/src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from catkin_pkg.python_setup import generate_distutils_setup
3 |
4 | d = generate_distutils_setup(
5 | packages=['LaneDetection',],
6 | package_dir={'': 'src'}
7 | )
8 | setup(**d)
9 |
--------------------------------------------------------------------------------
/src/LaneDetection/readMe.md:
--------------------------------------------------------------------------------
1 | # Lane Detection
2 |
3 | ## Implemented Models
4 |
5 | models implementation files added in models/**.py
6 |
7 | ## LaneNet
8 | [paper] (https://arxiv.org/abs/1802.05591)
9 | [link] (https://github.com/MaybeShewill-CV/lanenet-lane-detection)
10 |
11 | for getting inferance with LaneNet model please visit link below, download weights and copy files to models/LaneNet/weights.
12 |
13 | [weights] (https://www.dropbox.com/sh/0b6r0ljqi76kyg9/AADedYWO3bnx4PhK1BmbJkJKa?dl=0)
14 |
15 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Carla Ros Lane Keeping System
2 |
3 | Use Carla simulator and ROS bridge to implement lane keeping system.
4 |
5 | 
6 |
7 | ## Enviroment
8 |
9 | Tested on CARLA_0.9.12. visit to following link for Carla version
10 | [link] (/https://github.com/carla-simulator/carla/releases/tag/0.9.12)
11 |
12 | Ros bridge
13 | [link] (https://github.com/carla-simulator/ros-bridge)
14 |
15 |
16 | ### What will be added
17 | - [✓] Lane detection algortihm
18 | - [✓] Ros nodes will be update
19 | - [ ] MPC controller implementation
20 | - [ ] Kalman filter
21 |
22 |
--------------------------------------------------------------------------------
/src/LaneDetection/read_img.py:
--------------------------------------------------------------------------------
1 | import rospy
2 | import numpy as np
3 | from sensor_msgs.msg import Image
4 | import cv2
5 | from threading import Thread
6 |
7 |
8 |
9 | def callback(image):
10 | byte_image = image.data
11 | np_image = np.frombuffer(byte_image,dtype=np.uint8)
12 | bgra_image = np_image.reshape((image.height,image.width,4))
13 | bgr_image = cv2.cvtColor(bgra_image,cv2.COLOR_BGRA2BGR)
14 |
15 |
16 | cv2.imshow("Camera Front",bgr_image)
17 | cv2.waitKey(10)
18 |
19 | if __name__ == "__main__":
20 | rospy.init_node('camera', anonymous=True)
21 |
22 | rospy.Subscriber("/lka/detected_image", Image, callback)
23 |
24 | Thread(target=rospy.spin()).start()
25 |
--------------------------------------------------------------------------------
/src/LaneDetection/models/LaneNet/lanenet_front_end.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # @Time : 19-4-24 下午3:53
4 | # @Author : MaybeShewill-CV
5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection
6 | # @File : lanenet_front_end.py
7 | # @IDE: PyCharm
8 | """
9 | LaneNet frontend branch which is mainly used for feature extraction
10 | """
11 | import cnn_basenet
12 | import vgg16_based_fcn
13 | import bisenet_v2
14 |
15 |
16 | class LaneNetFrondEnd(cnn_basenet.CNNBaseModel):
17 | """
18 | LaneNet frontend which is used to extract image features for following process
19 | """
20 | def __init__(self, phase, net_flag, cfg):
21 | """
22 |
23 | """
24 | super(LaneNetFrondEnd, self).__init__()
25 | self._cfg = cfg
26 |
27 | self._frontend_net_map = {
28 | 'vgg': vgg16_based_fcn.VGG16FCN(phase=phase, cfg=self._cfg),
29 | 'bisenetv2': bisenet_v2.BiseNetV2(phase=phase, cfg=self._cfg),
30 | }
31 |
32 | self._net = self._frontend_net_map[net_flag]
33 |
34 | def build_model(self, input_tensor, name, reuse):
35 | """
36 |
37 | :param input_tensor:
38 | :param name:
39 | :param reuse:
40 | :return:
41 | """
42 |
43 | return self._net.build_model(
44 | input_tensor=input_tensor,
45 | name=name,
46 | reuse=reuse
47 | )
48 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/settings.json
2 | .vscode/c_cpp_properties.json
3 | src/LaneDetection/models/LaneNet/__pycache__/bisenet_v2.cpython-37.pyc
4 | src/LaneDetection/models/LaneNet/__pycache__/cnn_basenet.cpython-37.pyc
5 | src/LaneDetection/models/LaneNet/__pycache__/inferance.cpython-37.pyc
6 | src/LaneDetection/models/LaneNet/__pycache__/lanenet_back_end.cpython-37.pyc
7 | src/LaneDetection/models/LaneNet/__pycache__/lanenet_discriminative_loss.cpython-37.pyc
8 | src/LaneDetection/models/LaneNet/__pycache__/lanenet_front_end.cpython-37.pyc
9 | src/LaneDetection/models/LaneNet/__pycache__/lanenet.cpython-37.pyc
10 | src/LaneDetection/models/LaneNet/__pycache__/parse_config_utils.cpython-37.pyc
11 | src/LaneDetection/models/LaneNet/__pycache__/vgg16_based_fcn.cpython-37.pyc
12 | src/LaneDetection/models/LaneNet/weights/checkpoint
13 | src/LaneDetection/models/LaneNet/weights/tusimple_lanenet.ckpt.data-00000-of-00001
14 | src/LaneDetection/models/LaneNet/weights/tusimple_lanenet.ckpt.index
15 | src/LaneDetection/models/LaneNet/weights/tusimple_lanenet.ckpt.meta
16 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc
17 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc
18 | src/LaneDetection/__pycache__/lane_detection_publisher.cpython-37.pyc
19 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc
20 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc
21 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc
22 | src/LaneDetection/models/__pycache__/laneNet_class.cpython-37.pyc
23 |
--------------------------------------------------------------------------------
/src/LaneDetection/lane_detection_publisher.py:
--------------------------------------------------------------------------------
1 | import rospy
2 | import numpy as np
3 | from sensor_msgs.msg import Image
4 | import cv2
5 |
6 | class LaneDetection(object):
7 | def __init__(self,model):
8 | self.model = model
9 | rospy.init_node('camera', anonymous=True)
10 | rospy.Subscriber("/carla/ego_vehicle/rgb_front/image", Image, self.callback)
11 | self.pub = rospy.Publisher("/lka/detected_image",Image,queue_size=10)
12 | rospy.spin()
13 |
14 | def callback(self,raw_image):
15 | byte_image = raw_image.data
16 | np_image = np.frombuffer(byte_image,dtype=np.uint8)
17 | bgra_image = np_image.reshape((raw_image.height,raw_image.width,4))
18 | rgb_image = cv2.cvtColor(bgra_image,cv2.COLOR_BGRA2RGB)
19 |
20 | publish_image = Image()
21 | publish_image.header = raw_image.header
22 | publish_image.is_bigendian = raw_image.is_bigendian
23 | publish_image.encoding = raw_image.encoding
24 |
25 | prediction,lane_center = self.model.predict(rgb_image)
26 | publish_image.height = prediction.shape[0]
27 | publish_image.width = prediction.shape[1]
28 |
29 | prediction = cv2.cvtColor(prediction,cv2.COLOR_RGB2BGRA).astype(np.uint8)
30 | byte_data = prediction.tobytes()
31 | publish_image.data = byte_data
32 |
33 | self.pub.publish(publish_image)
34 |
35 |
36 | if __name__ == "__main__":
37 | from LaneDetection.models import laneNet_class
38 |
39 | model = laneNet_class.LaneNet()
40 | ros_node = LaneDetection(model)
--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | lka
4 | 0.0.0
5 | The lka package
6 |
7 |
8 |
9 |
10 | eren
11 |
12 |
13 |
14 |
15 |
16 | TODO
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | catkin
52 | roscpp
53 | rospy
54 | std_msgs
55 | roscpp
56 | rospy
57 | std_msgs
58 | roscpp
59 | rospy
60 | std_msgs
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
--------------------------------------------------------------------------------
/src/LaneDetection/models/LaneNet/tusimple_lanenet.yaml:
--------------------------------------------------------------------------------
1 | AUG:
2 | RESIZE_METHOD: 'stepscaling' # choice unpadding rangescaling and stepscaling
3 | FIX_RESIZE_SIZE: [720, 720] # (width, height), for unpadding
4 | INF_RESIZE_VALUE: 500 # for rangescaling
5 | MAX_RESIZE_VALUE: 600 # for rangescaling
6 | MIN_RESIZE_VALUE: 400 # for rangescaling
7 | MAX_SCALE_FACTOR: 2.0 # for stepscaling
8 | MIN_SCALE_FACTOR: 0.75 # for stepscaling
9 | SCALE_STEP_SIZE: 0.25 # for stepscaling
10 | TRAIN_CROP_SIZE: [512, 256] # crop size for training
11 | EVAL_CROP_SIZE: [512, 256] # crop size for evaluating
12 | CROP_PAD_SIZE: 32
13 | MIRROR: True
14 | FLIP: False
15 | FLIP_RATIO: 0.5
16 | RICH_CROP:
17 | ENABLE: False
18 | BLUR: True
19 | BLUR_RATIO: 0.2
20 | MAX_ROTATION: 15
21 | MIN_AREA_RATIO: 0.5
22 | ASPECT_RATIO: 0.5
23 | BRIGHTNESS_JITTER_RATIO: 0.5
24 | CONTRAST_JITTER_RATIO: 0.5
25 | SATURATION_JITTER_RATIO: 0.5
26 | DATASET:
27 | DATA_DIR: 'REPO_ROOT_PATH/data/training_data_example/'
28 | IMAGE_TYPE: 'rgb' # choice rgb or rgba
29 | NUM_CLASSES: 2
30 | TEST_FILE_LIST: 'REPO_ROOT_PATH/data/training_data_example/test.txt'
31 | TRAIN_FILE_LIST: 'REPO_ROOT_PATH/data/training_data_example/train.txt'
32 | VAL_FILE_LIST: 'REPO_ROOT_PATH/data/training_data_example/val.txt'
33 | IGNORE_INDEX: 255
34 | PADDING_VALUE: [127.5, 127.5, 127.5]
35 | MEAN_VALUE: [0.5, 0.5, 0.5]
36 | STD_VALUE: [0.5, 0.5, 0.5]
37 | CPU_MULTI_PROCESS_NUMS: 8
38 | FREEZE:
39 | MODEL_FILENAME: 'model'
40 | PARAMS_FILENAME: 'params'
41 | MODEL:
42 | MODEL_NAME: 'lanenet'
43 | FRONT_END: 'bisenetv2'
44 | EMBEDDING_FEATS_DIMS: 4
45 | BISENETV2:
46 | GE_EXPAND_RATIO: 6
47 | SEMANTIC_CHANNEL_LAMBDA: 0.25
48 | SEGHEAD_CHANNEL_EXPAND_RATIO: 2
49 | TEST:
50 | TEST_MODEL: 'model/cityscapes/final'
51 | TRAIN:
52 | MODEL_SAVE_DIR: 'model/tusimple/'
53 | TBOARD_SAVE_DIR: 'tboard/tusimple/'
54 | MODEL_PARAMS_CONFIG_FILE_NAME: "model_train_config.json"
55 | RESTORE_FROM_SNAPSHOT:
56 | ENABLE: False
57 | SNAPSHOT_PATH: ''
58 | SNAPSHOT_EPOCH: 8
59 | BATCH_SIZE: 32
60 | VAL_BATCH_SIZE: 4
61 | EPOCH_NUMS: 905
62 | WARM_UP:
63 | ENABLE: True
64 | EPOCH_NUMS: 8
65 | FREEZE_BN:
66 | ENABLE: False
67 | COMPUTE_MIOU:
68 | ENABLE: True
69 | EPOCH: 1
70 | MULTI_GPU:
71 | ENABLE: True
72 | GPU_DEVICES: ['0', '1']
73 | CHIEF_DEVICE_INDEX: 0
74 | SOLVER:
75 | LR: 0.001
76 | LR_POLICY: 'poly'
77 | LR_POLYNOMIAL_POWER: 0.9
78 | OPTIMIZER: 'sgd'
79 | MOMENTUM: 0.9
80 | WEIGHT_DECAY: 0.0005
81 | MOVING_AVE_DECAY: 0.9995
82 | LOSS_TYPE: 'cross_entropy'
83 | OHEM:
84 | ENABLE: False
85 | SCORE_THRESH: 0.65
86 | MIN_SAMPLE_NUMS: 65536
87 | GPU:
88 | GPU_MEMORY_FRACTION: 0.9
89 | TF_ALLOW_GROWTH: True
90 | POSTPROCESS:
91 | MIN_AREA_THRESHOLD: 100
92 | DBSCAN_EPS: 0.35
93 | DBSCAN_MIN_SAMPLES: 1000
94 | LOG:
95 | SAVE_DIR: './log'
96 | LEVEL: INFO
97 |
--------------------------------------------------------------------------------
/src/LaneDetection/models/LaneNet/lanenet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # @Time : 19-4-24 下午8:50
4 | # @Author : MaybeShewill-CV
5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection
6 | # @File : lanenet.py
7 | # @IDE: PyCharm
8 | """
9 | Implement LaneNet Model
10 | """
11 | import tensorflow as tf
12 |
13 | import lanenet_back_end
14 | import lanenet_front_end
15 | import cnn_basenet
16 |
17 |
18 | class LaneNet(cnn_basenet.CNNBaseModel):
19 | """
20 |
21 | """
22 | def __init__(self, phase, cfg):
23 | """
24 |
25 | """
26 | super(LaneNet, self).__init__()
27 | self._cfg = cfg
28 | self._net_flag = self._cfg.MODEL.FRONT_END
29 |
30 | self._frontend = lanenet_front_end.LaneNetFrondEnd(
31 | phase=phase, net_flag=self._net_flag, cfg=self._cfg
32 | )
33 | self._backend = lanenet_back_end.LaneNetBackEnd(
34 | phase=phase, cfg=self._cfg
35 | )
36 |
37 | def inference(self, input_tensor, name, reuse=False):
38 | """
39 |
40 | :param input_tensor:
41 | :param name:
42 | :param reuse
43 | :return:
44 | """
45 | with tf.variable_scope(name_or_scope=name, reuse=reuse):
46 | # first extract image features
47 | extract_feats_result = self._frontend.build_model(
48 | input_tensor=input_tensor,
49 | name='{:s}_frontend'.format(self._net_flag),
50 | reuse=reuse
51 | )
52 |
53 | # second apply backend process
54 | binary_seg_prediction, instance_seg_prediction = self._backend.inference(
55 | binary_seg_logits=extract_feats_result['binary_segment_logits']['data'],
56 | instance_seg_logits=extract_feats_result['instance_segment_logits']['data'],
57 | name='{:s}_backend'.format(self._net_flag),
58 | reuse=reuse
59 | )
60 |
61 | return binary_seg_prediction, instance_seg_prediction
62 |
63 | def compute_loss(self, input_tensor, binary_label, instance_label, name, reuse=False):
64 | """
65 | calculate lanenet loss for training
66 | :param input_tensor:
67 | :param binary_label:
68 | :param instance_label:
69 | :param name:
70 | :param reuse:
71 | :return:
72 | """
73 | with tf.variable_scope(name_or_scope=name, reuse=reuse):
74 | # first extract image features
75 | extract_feats_result = self._frontend.build_model(
76 | input_tensor=input_tensor,
77 | name='{:s}_frontend'.format(self._net_flag),
78 | reuse=reuse
79 | )
80 |
81 | # second apply backend process
82 | calculated_losses = self._backend.compute_loss(
83 | binary_seg_logits=extract_feats_result['binary_segment_logits']['data'],
84 | binary_label=binary_label,
85 | instance_seg_logits=extract_feats_result['instance_segment_logits']['data'],
86 | instance_label=instance_label,
87 | name='{:s}_backend'.format(self._net_flag),
88 | reuse=reuse
89 | )
90 |
91 | return calculated_losses
92 |
--------------------------------------------------------------------------------
/src/LaneDetection/models/laneNet_class.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import tensorflow as tf
4 | import os
5 | import sys
6 |
7 | ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
8 | sys.path.append(os.path.join(ROOT_PATH,"LaneNet"))
9 |
10 | import lanenet
11 | #from lanenet_model import lanenet_postprocess
12 | import parse_config_utils
13 |
14 |
15 | class LaneNet(object):
16 | def __init__(self):
17 | self.cfg = parse_config_utils.lanenet_cfg
18 | self.input_tensor = tf.placeholder(dtype=tf.float32, shape=[1, 256, 512, 3], name='input_tensor')
19 | self.net = lanenet.LaneNet(phase='test', cfg=self.cfg)
20 | self.binary_seg_ret, self.instance_seg_ret = self.net.inference(input_tensor=self.input_tensor, name='LaneNet')
21 |
22 |
23 | self.weights_path = os.path.join(ROOT_PATH,"LaneNet","weights/tusimple_lanenet.ckpt")
24 |
25 | # Set sess configuration
26 | sess_config = tf.ConfigProto()
27 | sess_config.gpu_options.per_process_gpu_memory_fraction = self.cfg.GPU.GPU_MEMORY_FRACTION
28 | sess_config.gpu_options.allow_growth = self.cfg.GPU.TF_ALLOW_GROWTH
29 | sess_config.gpu_options.allocator_type = 'BFC'
30 |
31 | self.sess = tf.Session(config=sess_config)
32 |
33 | # define moving average version of the learned variables for eval
34 | with tf.variable_scope(name_or_scope='moving_avg'):
35 | variable_averages = tf.train.ExponentialMovingAverage(
36 | self.cfg.SOLVER.MOVING_AVE_DECAY)
37 | variables_to_restore = variable_averages.variables_to_restore()
38 |
39 | self.saver = tf.train.Saver(variables_to_restore)
40 | self.saver.restore(sess=self.sess, save_path=self.weights_path)
41 |
42 | print("LaneNet Model Initilaized")
43 |
44 | @staticmethod
45 | def preProcessing(image):
46 | image = cv2.resize(image, (512, 256), interpolation=cv2.INTER_LINEAR)
47 | image = image / 127.5 - 1.0
48 | return image
49 |
50 |
51 | def predict(self,image):
52 | src_image = self.preProcessing(image)
53 |
54 | with self.sess.as_default():
55 | self.binary_seg_image, self.instance_seg_image = self.sess.run(
56 | [self.binary_seg_ret, self.instance_seg_ret],
57 | feed_dict={self.input_tensor: [src_image]}
58 | )
59 | rgb = self.instance_seg_image[0].astype(np.uint8)
60 | bw = self.binary_seg_image[0].astype(np.uint8)
61 | res = cv2.bitwise_and(rgb,rgb,mask=bw)
62 |
63 | lanes_rgb,center_xy = self.postProcess(res)
64 | return lanes_rgb,center_xy
65 |
66 | def postProcess(self,image):
67 | src_img = cv2.cvtColor(image,cv2.COLOR_RGBA2RGB)
68 |
69 | red_mask = (src_img[:,:,2]>200).astype(np.uint8)
70 | src_img = cv2.bitwise_and(src_img,src_img,mask=1-red_mask)
71 |
72 | #Right Lanes
73 | green_mask = (src_img[:,:,1]>200).astype(np.uint8)
74 | green_area = cv2.bitwise_and(src_img,src_img,mask=green_mask)
75 |
76 | #Left Lanes
77 | blue_mask = (src_img[:,:,0]>200).astype(np.uint8)
78 | blue_area = cv2.bitwise_and(src_img,src_img,mask=blue_mask)
79 |
80 | lanes_rgb = cv2.addWeighted(green_area,1,blue_area,1,0)
81 |
82 | img_center_point,center_xy = self.window_search(green_mask,blue_mask)
83 | lanes_rgb = cv2.addWeighted(lanes_rgb,1,img_center_point,1,0)
84 |
85 | return lanes_rgb,center_xy
86 |
87 | @staticmethod
88 | def window_search(righ_lane, left_lane):
89 | center_coordinates =[]
90 | out = np.zeros(righ_lane.shape,np.uint8)
91 | out = cv2.merge((out,out,out))
92 |
93 | mid_point = np.int(righ_lane.shape[1]/2)
94 |
95 | nwindows = 9
96 | h = righ_lane.shape[0]
97 | vp = int(h/2)
98 | window_height = np.int(vp/nwindows)
99 |
100 | r_lane = righ_lane[vp:,:].copy()
101 | r_lane = cv2.erode(r_lane,np.ones((3,3)))
102 |
103 | l_lane = left_lane[vp:,:]
104 | l_lane = cv2.erode(l_lane,np.ones((3,3)))
105 |
106 | for window in range(nwindows):
107 | win_y_low = vp - (window+1)*window_height
108 | win_y_high = vp - window*window_height
109 | win_y_center = win_y_low + int((win_y_high-win_y_low)/2)
110 |
111 | r_row = r_lane[win_y_low:win_y_high,:]
112 | l_row = l_lane[win_y_low:win_y_high,:]
113 |
114 | histogram = np.sum(r_row, axis=0)
115 | r_point = np.argmax(histogram)
116 |
117 | histogram = np.sum(l_row, axis=0)
118 | l_point = np.argmax(histogram)
119 |
120 | if(l_point != 0) and (r_point != 0):
121 | rd = r_point-mid_point
122 | ld = mid_point-l_point
123 | if(abs(rd-ld)<100):
124 | center = l_point + int((r_point-l_point)/2)
125 | out = cv2.circle(out,(center,vp+win_y_center),2,(0,0,255),-1)
126 | center_coordinates.append((center,vp+win_y_center))
127 | return out,center_coordinates
--------------------------------------------------------------------------------
/src/LaneDetection/models/LaneNet/lanenet_discriminative_loss.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # @Time : 18-5-11 下午3:48
4 | # @Author : MaybeShewill-CV
5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection
6 | # @File : lanenet_discriminative_loss.py
7 | # @IDE: PyCharm Community Edition
8 | """
9 | Discriminative Loss for instance segmentation
10 | """
11 | import tensorflow as tf
12 |
13 |
14 | def discriminative_loss_single(
15 | prediction,
16 | correct_label,
17 | feature_dim,
18 | label_shape,
19 | delta_v,
20 | delta_d,
21 | param_var,
22 | param_dist,
23 | param_reg):
24 | """
25 | discriminative loss
26 | :param prediction: inference of network
27 | :param correct_label: instance label
28 | :param feature_dim: feature dimension of prediction
29 | :param label_shape: shape of label
30 | :param delta_v: cut off variance distance
31 | :param delta_d: cut off cluster distance
32 | :param param_var: weight for intra cluster variance
33 | :param param_dist: weight for inter cluster distances
34 | :param param_reg: weight regularization
35 | """
36 | correct_label = tf.reshape(
37 | correct_label, [label_shape[1] * label_shape[0]]
38 | )
39 | reshaped_pred = tf.reshape(
40 | prediction, [label_shape[1] * label_shape[0], feature_dim]
41 | )
42 |
43 | # calculate instance nums
44 | unique_labels, unique_id, counts = tf.unique_with_counts(correct_label)
45 | counts = tf.cast(counts, tf.float32)
46 | num_instances = tf.size(unique_labels)
47 |
48 | # calculate instance pixel embedding mean vec
49 | segmented_sum = tf.unsorted_segment_sum(
50 | reshaped_pred, unique_id, num_instances)
51 | mu = tf.div(segmented_sum, tf.reshape(counts, (-1, 1)))
52 | mu_expand = tf.gather(mu, unique_id)
53 |
54 | distance = tf.norm(tf.subtract(mu_expand, reshaped_pred), axis=1, ord=1)
55 | distance = tf.subtract(distance, delta_v)
56 | distance = tf.clip_by_value(distance, 0., distance)
57 | distance = tf.square(distance)
58 |
59 | l_var = tf.unsorted_segment_sum(distance, unique_id, num_instances)
60 | l_var = tf.div(l_var, counts)
61 | l_var = tf.reduce_sum(l_var)
62 | l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32))
63 |
64 | mu_interleaved_rep = tf.tile(mu, [num_instances, 1])
65 | mu_band_rep = tf.tile(mu, [1, num_instances])
66 | mu_band_rep = tf.reshape(
67 | mu_band_rep,
68 | (num_instances *
69 | num_instances,
70 | feature_dim))
71 |
72 | mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep)
73 |
74 | intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff), axis=1)
75 | zero_vector = tf.zeros(1, dtype=tf.float32)
76 | bool_mask = tf.not_equal(intermediate_tensor, zero_vector)
77 | mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask)
78 |
79 | mu_norm = tf.norm(mu_diff_bool, axis=1, ord=1)
80 | mu_norm = tf.subtract(2. * delta_d, mu_norm)
81 | mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm)
82 | mu_norm = tf.square(mu_norm)
83 |
84 | l_dist = tf.reduce_mean(mu_norm)
85 |
86 | l_reg = tf.reduce_mean(tf.norm(mu, axis=1, ord=1))
87 |
88 | param_scale = 1.
89 | l_var = param_var * l_var
90 | l_dist = param_dist * l_dist
91 | l_reg = param_reg * l_reg
92 |
93 | loss = param_scale * (l_var + l_dist + l_reg)
94 |
95 | return loss, l_var, l_dist, l_reg
96 |
97 |
98 | def discriminative_loss(prediction, correct_label, feature_dim, image_shape,
99 | delta_v, delta_d, param_var, param_dist, param_reg):
100 | """
101 |
102 | :return: discriminative loss and its three components
103 | """
104 |
105 | def cond(label, batch, out_loss, out_var, out_dist, out_reg, i):
106 | return tf.less(i, tf.shape(batch)[0])
107 |
108 | def body(label, batch, out_loss, out_var, out_dist, out_reg, i):
109 | disc_loss, l_var, l_dist, l_reg = discriminative_loss_single(
110 | prediction[i], correct_label[i], feature_dim, image_shape, delta_v, delta_d, param_var, param_dist, param_reg)
111 |
112 | out_loss = out_loss.write(i, disc_loss)
113 | out_var = out_var.write(i, l_var)
114 | out_dist = out_dist.write(i, l_dist)
115 | out_reg = out_reg.write(i, l_reg)
116 |
117 | return label, batch, out_loss, out_var, out_dist, out_reg, i + 1
118 |
119 | # TensorArray is a data structure that support dynamic writing
120 | output_ta_loss = tf.TensorArray(
121 | dtype=tf.float32, size=0, dynamic_size=True)
122 | output_ta_var = tf.TensorArray(
123 | dtype=tf.float32, size=0, dynamic_size=True)
124 | output_ta_dist = tf.TensorArray(
125 | dtype=tf.float32, size=0, dynamic_size=True)
126 | output_ta_reg = tf.TensorArray(
127 | dtype=tf.float32, size=0, dynamic_size=True)
128 |
129 | _, _, out_loss_op, out_var_op, out_dist_op, out_reg_op, _ = tf.while_loop(
130 | cond, body, [
131 | correct_label, prediction, output_ta_loss, output_ta_var, output_ta_dist, output_ta_reg, 0])
132 | out_loss_op = out_loss_op.stack()
133 | out_var_op = out_var_op.stack()
134 | out_dist_op = out_dist_op.stack()
135 | out_reg_op = out_reg_op.stack()
136 |
137 | disc_loss = tf.reduce_mean(out_loss_op)
138 | l_var = tf.reduce_mean(out_var_op)
139 | l_dist = tf.reduce_mean(out_dist_op)
140 | l_reg = tf.reduce_mean(out_reg_op)
141 |
142 | return disc_loss, l_var, l_dist, l_reg
143 |
--------------------------------------------------------------------------------
/src/LaneDetection/models/LaneNet/parse_config_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # @Time : 2019/12/13 上午11:17
4 | # @Author : PaddlePaddle
5 | # @Site : https://github.com/PaddlePaddle/PaddleSeg
6 | # @File : parse_config_utils.py
7 | # @IDE: PyCharm
8 | """
9 | Parse config utils
10 | """
11 | import os
12 | import yaml
13 | import json
14 | import codecs
15 | from ast import literal_eval
16 |
17 |
18 | class Config(dict):
19 | """
20 | Config class
21 | """
22 | def __init__(self, *args, **kwargs):
23 | """
24 | init class
25 | :param args:
26 | :param kwargs:
27 | """
28 | if 'config_path' in kwargs:
29 | config_content = self._load_config_file(kwargs['config_path'])
30 | super(Config, self).__init__(config_content)
31 | else:
32 | super(Config, self).__init__(*args, **kwargs)
33 | self.immutable = False
34 |
35 | def __setattr__(self, key, value, create_if_not_exist=True):
36 | """
37 |
38 | :param key:
39 | :param value:
40 | :param create_if_not_exist:
41 | :return:
42 | """
43 | if key in ["immutable"]:
44 | self.__dict__[key] = value
45 | return
46 |
47 | t = self
48 | keylist = key.split(".")
49 | for k in keylist[:-1]:
50 | t = t.__getattr__(k, create_if_not_exist)
51 |
52 | t.__getattr__(keylist[-1], create_if_not_exist)
53 | t[keylist[-1]] = value
54 |
55 | def __getattr__(self, key, create_if_not_exist=True):
56 | """
57 |
58 | :param key:
59 | :param create_if_not_exist:
60 | :return:
61 | """
62 | if key in ["immutable"]:
63 | return self.__dict__[key]
64 |
65 | if key not in self:
66 | if not create_if_not_exist:
67 | raise KeyError
68 | self[key] = Config()
69 | if isinstance(self[key], dict):
70 | self[key] = Config(self[key])
71 | return self[key]
72 |
73 | def __setitem__(self, key, value):
74 | """
75 |
76 | :param key:
77 | :param value:
78 | :return:
79 | """
80 | if self.immutable:
81 | raise AttributeError(
82 | 'Attempted to set "{}" to "{}", but SegConfig is immutable'.
83 | format(key, value))
84 | #
85 | if isinstance(value, str):
86 | try:
87 | value = literal_eval(value)
88 | except ValueError:
89 | pass
90 | except SyntaxError:
91 | pass
92 | super(Config, self).__setitem__(key, value)
93 |
94 | @staticmethod
95 | def _load_config_file(config_file_path):
96 | """
97 |
98 | :param config_file_path
99 | :return:
100 | """
101 | if not os.access(config_file_path, os.R_OK):
102 | raise OSError('Config file: {:s}, can not be read'.format(config_file_path))
103 | with open(config_file_path, 'r') as f:
104 | config_content = yaml.safe_load(f)
105 |
106 | return config_content
107 |
108 | def update_from_config(self, other):
109 | """
110 |
111 | :param other:
112 | :return:
113 | """
114 | if isinstance(other, dict):
115 | other = Config(other)
116 | assert isinstance(other, Config)
117 | diclist = [("", other)]
118 | while len(diclist):
119 | prefix, tdic = diclist[0]
120 | diclist = diclist[1:]
121 | for key, value in tdic.items():
122 | key = "{}.{}".format(prefix, key) if prefix else key
123 | if isinstance(value, dict):
124 | diclist.append((key, value))
125 | continue
126 | try:
127 | self.__setattr__(key, value, create_if_not_exist=False)
128 | except KeyError:
129 | raise KeyError('Non-existent config key: {}'.format(key))
130 |
131 | def check_and_infer(self):
132 | """
133 |
134 | :return:
135 | """
136 | if self.DATASET.IMAGE_TYPE in ['rgb', 'gray']:
137 | self.DATASET.DATA_DIM = 3
138 | elif self.DATASET.IMAGE_TYPE in ['rgba']:
139 | self.DATASET.DATA_DIM = 4
140 | else:
141 | raise KeyError(
142 | 'DATASET.IMAGE_TYPE config error, only support `rgb`, `gray` and `rgba`'
143 | )
144 | if self.MEAN is not None:
145 | self.DATASET.PADDING_VALUE = [x * 255.0 for x in self.MEAN]
146 |
147 | if not self.TRAIN_CROP_SIZE:
148 | raise ValueError(
149 | 'TRAIN_CROP_SIZE is empty! Please set a pair of values in format (width, height)'
150 | )
151 |
152 | if not self.EVAL_CROP_SIZE:
153 | raise ValueError(
154 | 'EVAL_CROP_SIZE is empty! Please set a pair of values in format (width, height)'
155 | )
156 |
157 | # Ensure file list is use UTF-8 encoding
158 | train_sets = codecs.open(self.DATASET.TRAIN_FILE_LIST, 'r', 'utf-8').readlines()
159 | val_sets = codecs.open(self.DATASET.VAL_FILE_LIST, 'r', 'utf-8').readlines()
160 | test_sets = codecs.open(self.DATASET.TEST_FILE_LIST, 'r', 'utf-8').readlines()
161 | self.DATASET.TRAIN_TOTAL_IMAGES = len(train_sets)
162 | self.DATASET.VAL_TOTAL_IMAGES = len(val_sets)
163 | self.DATASET.TEST_TOTAL_IMAGES = len(test_sets)
164 |
165 | if self.MODEL.MODEL_NAME == 'icnet' and \
166 | len(self.MODEL.MULTI_LOSS_WEIGHT) != 3:
167 | self.MODEL.MULTI_LOSS_WEIGHT = [1.0, 0.4, 0.16]
168 |
169 | def update_from_list(self, config_list):
170 | if len(config_list) % 2 != 0:
171 | raise ValueError(
172 | "Command line options config format error! Please check it: {}".
173 | format(config_list))
174 | for key, value in zip(config_list[0::2], config_list[1::2]):
175 | try:
176 | self.__setattr__(key, value, create_if_not_exist=False)
177 | except KeyError:
178 | raise KeyError('Non-existent config key: {}'.format(key))
179 |
180 | def update_from_file(self, config_file):
181 | """
182 |
183 | :param config_file:
184 | :return:
185 | """
186 | with codecs.open(config_file, 'r', 'utf-8') as f:
187 | dic = yaml.safe_load(f)
188 | self.update_from_config(dic)
189 |
190 | def set_immutable(self, immutable):
191 | """
192 |
193 | :param immutable:
194 | :return:
195 | """
196 | self.immutable = immutable
197 | for value in self.values():
198 | if isinstance(value, Config):
199 | value.set_immutable(immutable)
200 |
201 | def is_immutable(self):
202 | """
203 |
204 | :return:
205 | """
206 | return self.immutable
207 |
208 | def dump_to_json_file(self, f_obj):
209 | """
210 |
211 | :param f_obj:
212 | :return:
213 | """
214 | origin_dict = dict()
215 | for key, val in self.items():
216 | if isinstance(val, Config):
217 | origin_dict.update({key: dict(val)})
218 | elif isinstance(val, dict):
219 | origin_dict.update({key: val})
220 | else:
221 | raise TypeError('Not supported type {}'.format(type(val)))
222 | return json.dump(origin_dict, f_obj)
223 |
224 | r_path = os.path.dirname(os.path.abspath(__file__))
225 | file_path = os.path.join(r_path,'tusimple_lanenet.yaml')
226 | lanenet_cfg = Config(config_path=file_path)
227 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8.3)
2 | project(lka)
3 |
4 | ## Compile as C++11, supported in ROS Kinetic and newer
5 | # add_compile_options(-std=c++11)
6 |
7 | ## Find catkin macros and libraries
8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
9 | ## is used, also find other catkin packages
10 | find_package(catkin REQUIRED COMPONENTS
11 | roscpp
12 | rospy
13 | std_msgs
14 | )
15 |
16 | ## System dependencies are found with CMake's conventions
17 | # find_package(Boost REQUIRED COMPONENTS system)
18 |
19 |
20 | ## Uncomment this if the package has a setup.py. This macro ensures
21 | ## modules and global scripts declared therein get installed
22 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
23 | catkin_python_setup()
24 |
25 | ################################################
26 | ## Declare ROS messages, services and actions ##
27 | ################################################
28 |
29 | ## To declare and build messages, services or actions from within this
30 | ## package, follow these steps:
31 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in
32 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
33 | ## * In the file package.xml:
34 | ## * add a build_depend tag for "message_generation"
35 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
36 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
37 | ## but can be declared for certainty nonetheless:
38 | ## * add a exec_depend tag for "message_runtime"
39 | ## * In this file (CMakeLists.txt):
40 | ## * add "message_generation" and every package in MSG_DEP_SET to
41 | ## find_package(catkin REQUIRED COMPONENTS ...)
42 | ## * add "message_runtime" and every package in MSG_DEP_SET to
43 | ## catkin_package(CATKIN_DEPENDS ...)
44 | ## * uncomment the add_*_files sections below as needed
45 | ## and list every .msg/.srv/.action file to be processed
46 | ## * uncomment the generate_messages entry below
47 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
48 |
49 | ## Generate messages in the 'msg' folder
50 | # add_message_files(
51 | # FILES
52 | # Message1.msg
53 | # Message2.msg
54 | # )
55 |
56 | ## Generate services in the 'srv' folder
57 | # add_service_files(
58 | # FILES
59 | # Service1.srv
60 | # Service2.srv
61 | # )
62 |
63 | ## Generate actions in the 'action' folder
64 | # add_action_files(
65 | # FILES
66 | # Action1.action
67 | # Action2.action
68 | # )
69 |
70 | ## Generate added messages and services with any dependencies listed here
71 | # generate_messages(
72 | # DEPENDENCIES
73 | # std_msgs
74 | # )
75 |
76 | ################################################
77 | ## Declare ROS dynamic reconfigure parameters ##
78 | ################################################
79 |
80 | ## To declare and build dynamic reconfigure parameters within this
81 | ## package, follow these steps:
82 | ## * In the file package.xml:
83 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
84 | ## * In this file (CMakeLists.txt):
85 | ## * add "dynamic_reconfigure" to
86 | ## find_package(catkin REQUIRED COMPONENTS ...)
87 | ## * uncomment the "generate_dynamic_reconfigure_options" section below
88 | ## and list every .cfg file to be processed
89 |
90 | ## Generate dynamic reconfigure parameters in the 'cfg' folder
91 | # generate_dynamic_reconfigure_options(
92 | # cfg/DynReconf1.cfg
93 | # cfg/DynReconf2.cfg
94 | # )
95 |
96 | ###################################
97 | ## catkin specific configuration ##
98 | ###################################
99 | ## The catkin_package macro generates cmake config files for your package
100 | ## Declare things to be passed to dependent projects
101 | ## INCLUDE_DIRS: uncomment this if your package contains header files
102 | ## LIBRARIES: libraries you create in this project that dependent projects also need
103 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need
104 | ## DEPENDS: system dependencies of this project that dependent projects also need
105 | catkin_package(
106 | INCLUDE_DIRS include
107 | # LIBRARIES LKA
108 | CATKIN_DEPENDS roscpp rospy std_msgs
109 | # DEPENDS system_lib
110 | )
111 |
112 | ###########
113 | ## Build ##
114 | ###########
115 |
116 | ## Specify additional locations of header files
117 | ## Your package locations should be listed before other locations
118 | include_directories(
119 | # include
120 | ${catkin_INCLUDE_DIRS}
121 | include
122 | )
123 |
124 | ## Declare a C++ library
125 | # add_library(${PROJECT_NAME}
126 | # src/${PROJECT_NAME}/LKA.cpp
127 | # )
128 |
129 | ## Add cmake target dependencies of the library
130 | ## as an example, code may need to be generated before libraries
131 | ## either from message generation or dynamic reconfigure
132 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
133 |
134 | ## Declare a C++ executable
135 | ## With catkin_make all packages are built within a single CMake context
136 | ## The recommended prefix ensures that target names across packages don't collide
137 | # add_executable(${PROJECT_NAME}_node src/LKA_node.cpp)
138 |
139 | ## Rename C++ executable without prefix
140 | ## The above recommended prefix causes long target names, the following renames the
141 | ## target back to the shorter version for ease of user use
142 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
143 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
144 |
145 | ## Add cmake target dependencies of the executable
146 | ## same as for the library above
147 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
148 |
149 | ## Specify libraries to link a library or executable target against
150 | # target_link_libraries(${PROJECT_NAME}_node
151 | # ${catkin_LIBRARIES}
152 | # )
153 |
154 | #############
155 | ## Install ##
156 | #############
157 |
158 | # all install targets should use catkin DESTINATION variables
159 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
160 |
161 | ## Mark executable scripts (Python etc.) for installation
162 | ## in contrast to setup.py, you can choose the destination
163 | catkin_install_python(PROGRAMS
164 | src/LaneDetection/lane_detection.py
165 | src/LaneDetection/lane_detection_publisher.py
166 | src/LaneDetection/read_img.py
167 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
168 | )
169 |
170 | ## Mark executables for installation
171 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
172 | # install(TARGETS ${PROJECT_NAME}_node
173 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
174 | # )
175 |
176 | ## Mark libraries for installation
177 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
178 | # install(TARGETS ${PROJECT_NAME}
179 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
180 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
181 | # RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
182 | # )
183 |
184 | ## Mark cpp header files for installation
185 | # install(DIRECTORY include/${PROJECT_NAME}/
186 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
187 | # FILES_MATCHING PATTERN "*.h"
188 | # PATTERN ".svn" EXCLUDE
189 | # )
190 |
191 | ## Mark other files for installation (e.g. launch and bag files, etc.)
192 | # install(FILES
193 | # # myfile1
194 | # # myfile2
195 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
196 | # )
197 |
198 | #############
199 | ## Testing ##
200 | #############
201 |
202 | ## Add gtest based cpp test target and link libraries
203 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_LKA.cpp)
204 | # if(TARGET ${PROJECT_NAME}-test)
205 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
206 | # endif()
207 |
208 | ## Add folders to be run by python nosetests
209 | # catkin_add_nosetests(test)
210 |
--------------------------------------------------------------------------------
/src/LaneDetection/models/LaneNet/lanenet_back_end.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # @Time : 19-4-24 下午3:54
4 | # @Author : MaybeShewill-CV
5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection
6 | # @File : lanenet_back_end.py
7 | # @IDE: PyCharm
8 | """
9 | LaneNet backend branch which is mainly used for binary and instance segmentation loss calculation
10 | """
11 | import tensorflow as tf
12 |
13 | import lanenet_discriminative_loss
14 | import cnn_basenet
15 |
16 |
17 | class LaneNetBackEnd(cnn_basenet.CNNBaseModel):
18 | """
19 | LaneNet backend branch which is mainly used for binary and instance segmentation loss calculation
20 | """
21 | def __init__(self, phase, cfg):
22 | """
23 | init lanenet backend
24 | :param phase: train or test
25 | """
26 | super(LaneNetBackEnd, self).__init__()
27 | self._cfg = cfg
28 | self._phase = phase
29 | self._is_training = self._is_net_for_training()
30 |
31 | self._class_nums = self._cfg.DATASET.NUM_CLASSES
32 | self._embedding_dims = self._cfg.MODEL.EMBEDDING_FEATS_DIMS
33 | self._binary_loss_type = self._cfg.SOLVER.LOSS_TYPE
34 |
35 | def _is_net_for_training(self):
36 | """
37 | if the net is used for training or not
38 | :return:
39 | """
40 | if isinstance(self._phase, tf.Tensor):
41 | phase = self._phase
42 | else:
43 | phase = tf.constant(self._phase, dtype=tf.string)
44 |
45 | return tf.equal(phase, tf.constant('train', dtype=tf.string))
46 |
47 | @classmethod
48 | def _compute_class_weighted_cross_entropy_loss(cls, onehot_labels, logits, classes_weights):
49 | """
50 |
51 | :param onehot_labels:
52 | :param logits:
53 | :param classes_weights:
54 | :return:
55 | """
56 | loss_weights = tf.reduce_sum(tf.multiply(onehot_labels, classes_weights), axis=3)
57 |
58 | loss = tf.losses.softmax_cross_entropy(
59 | onehot_labels=onehot_labels,
60 | logits=logits,
61 | weights=loss_weights
62 | )
63 |
64 | return loss
65 |
66 | @classmethod
67 | def _multi_category_focal_loss(cls, onehot_labels, logits, classes_weights, gamma=2.0):
68 | """
69 |
70 | :param onehot_labels:
71 | :param logits:
72 | :param classes_weights:
73 | :param gamma:
74 | :return:
75 | """
76 | epsilon = 1.e-7
77 | alpha = tf.multiply(onehot_labels, classes_weights)
78 | alpha = tf.cast(alpha, tf.float32)
79 | gamma = float(gamma)
80 | y_true = tf.cast(onehot_labels, tf.float32)
81 | y_pred = tf.nn.softmax(logits, dim=-1)
82 | y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
83 | y_t = tf.multiply(y_true, y_pred) + tf.multiply(1-y_true, 1-y_pred)
84 | ce = -tf.log(y_t)
85 | weight = tf.pow(tf.subtract(1., y_t), gamma)
86 | fl = tf.multiply(tf.multiply(weight, ce), alpha)
87 | loss = tf.reduce_mean(fl)
88 |
89 | return loss
90 |
91 | def compute_loss(self, binary_seg_logits, binary_label,
92 | instance_seg_logits, instance_label,
93 | name, reuse):
94 | """
95 | compute lanenet loss
96 | :param binary_seg_logits:
97 | :param binary_label:
98 | :param instance_seg_logits:
99 | :param instance_label:
100 | :param name:
101 | :param reuse:
102 | :return:
103 | """
104 | with tf.variable_scope(name_or_scope=name, reuse=reuse):
105 | # calculate class weighted binary seg loss
106 | with tf.variable_scope(name_or_scope='binary_seg'):
107 | binary_label_onehot = tf.one_hot(
108 | tf.reshape(
109 | tf.cast(binary_label, tf.int32),
110 | shape=[binary_label.get_shape().as_list()[0],
111 | binary_label.get_shape().as_list()[1],
112 | binary_label.get_shape().as_list()[2]]),
113 | depth=self._class_nums,
114 | axis=-1
115 | )
116 |
117 | binary_label_plain = tf.reshape(
118 | binary_label,
119 | shape=[binary_label.get_shape().as_list()[0] *
120 | binary_label.get_shape().as_list()[1] *
121 | binary_label.get_shape().as_list()[2] *
122 | binary_label.get_shape().as_list()[3]])
123 | unique_labels, unique_id, counts = tf.unique_with_counts(binary_label_plain)
124 | counts = tf.cast(counts, tf.float32)
125 | inverse_weights = tf.divide(
126 | 1.0,
127 | tf.log(tf.add(tf.divide(counts, tf.reduce_sum(counts)), tf.constant(1.02)))
128 | )
129 | if self._binary_loss_type == 'cross_entropy':
130 | binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss(
131 | onehot_labels=binary_label_onehot,
132 | logits=binary_seg_logits,
133 | classes_weights=inverse_weights
134 | )
135 | elif self._binary_loss_type == 'focal':
136 | binary_segmenatation_loss = self._multi_category_focal_loss(
137 | onehot_labels=binary_label_onehot,
138 | logits=binary_seg_logits,
139 | classes_weights=inverse_weights
140 | )
141 | else:
142 | raise NotImplementedError
143 |
144 | # calculate class weighted instance seg loss
145 | with tf.variable_scope(name_or_scope='instance_seg'):
146 |
147 | pix_bn = self.layerbn(
148 | inputdata=instance_seg_logits, is_training=self._is_training, name='pix_bn')
149 | pix_relu = self.relu(inputdata=pix_bn, name='pix_relu')
150 | pix_embedding = self.conv2d(
151 | inputdata=pix_relu,
152 | out_channel=self._embedding_dims,
153 | kernel_size=1,
154 | use_bias=False,
155 | name='pix_embedding_conv'
156 | )
157 | pix_image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2])
158 | instance_segmentation_loss, l_var, l_dist, l_reg = \
159 | lanenet_discriminative_loss.discriminative_loss(
160 | pix_embedding, instance_label, self._embedding_dims,
161 | pix_image_shape, 0.5, 3.0, 1.0, 1.0, 0.001
162 | )
163 |
164 | l2_reg_loss = tf.constant(0.0, tf.float32)
165 | for vv in tf.trainable_variables():
166 | if 'bn' in vv.name or 'gn' in vv.name:
167 | continue
168 | else:
169 | l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
170 | l2_reg_loss *= 0.001
171 | total_loss = binary_segmenatation_loss + instance_segmentation_loss + l2_reg_loss
172 |
173 | ret = {
174 | 'total_loss': total_loss,
175 | 'binary_seg_logits': binary_seg_logits,
176 | 'instance_seg_logits': pix_embedding,
177 | 'binary_seg_loss': binary_segmenatation_loss,
178 | 'discriminative_loss': instance_segmentation_loss
179 | }
180 |
181 | return ret
182 |
183 | def inference(self, binary_seg_logits, instance_seg_logits, name, reuse):
184 | """
185 |
186 | :param binary_seg_logits:
187 | :param instance_seg_logits:
188 | :param name:
189 | :param reuse:
190 | :return:
191 | """
192 | with tf.variable_scope(name_or_scope=name, reuse=reuse):
193 |
194 | with tf.variable_scope(name_or_scope='binary_seg'):
195 | binary_seg_score = tf.nn.softmax(logits=binary_seg_logits)
196 | binary_seg_prediction = tf.argmax(binary_seg_score, axis=-1)
197 |
198 | with tf.variable_scope(name_or_scope='instance_seg'):
199 |
200 | pix_bn = self.layerbn(
201 | inputdata=instance_seg_logits, is_training=self._is_training, name='pix_bn')
202 | pix_relu = self.relu(inputdata=pix_bn, name='pix_relu')
203 | instance_seg_prediction = self.conv2d(
204 | inputdata=pix_relu,
205 | out_channel=self._embedding_dims,
206 | kernel_size=1,
207 | use_bias=False,
208 | name='pix_embedding_conv'
209 | )
210 |
211 | return binary_seg_prediction, instance_seg_prediction
212 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/src/LaneDetection/models/LaneNet/vgg16_based_fcn.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # @Time : 19-4-24 下午6:42
4 | # @Author : MaybeShewill-CV
5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection
6 | # @File : vgg16_based_fcn.py
7 | # @IDE: PyCharm
8 | """
9 | Implement VGG16 based fcn net for semantic segmentation
10 | """
11 | import collections
12 |
13 | import tensorflow as tf
14 |
15 | import cnn_basenet
16 | import parse_config_utils
17 |
18 |
19 | class VGG16FCN(cnn_basenet.CNNBaseModel):
20 | """
21 | VGG 16 based fcn net for semantic segmentation
22 | """
23 | def __init__(self, phase, cfg):
24 | """
25 |
26 | """
27 | super(VGG16FCN, self).__init__()
28 | self._cfg = cfg
29 | self._phase = phase
30 | self._is_training = self._is_net_for_training()
31 | self._net_intermediate_results = collections.OrderedDict()
32 | self._class_nums = self._cfg.DATASET.NUM_CLASSES
33 |
34 | def _is_net_for_training(self):
35 | """
36 | if the net is used for training or not
37 | :return:
38 | """
39 | if isinstance(self._phase, tf.Tensor):
40 | phase = self._phase
41 | else:
42 | phase = tf.constant(self._phase, dtype=tf.string)
43 |
44 | return tf.equal(phase, tf.constant('train', dtype=tf.string))
45 |
46 | def _vgg16_conv_stage(self, input_tensor, k_size, out_dims, name,
47 | stride=1, pad='SAME', need_layer_norm=True):
48 | """
49 | stack conv and activation in vgg16
50 | :param input_tensor:
51 | :param k_size:
52 | :param out_dims:
53 | :param name:
54 | :param stride:
55 | :param pad:
56 | :param need_layer_norm:
57 | :return:
58 | """
59 | with tf.variable_scope(name):
60 | conv = self.conv2d(
61 | inputdata=input_tensor, out_channel=out_dims,
62 | kernel_size=k_size, stride=stride,
63 | use_bias=False, padding=pad, name='conv'
64 | )
65 |
66 | if need_layer_norm:
67 | bn = self.layerbn(inputdata=conv, is_training=self._is_training, name='bn')
68 |
69 | relu = self.relu(inputdata=bn, name='relu')
70 | else:
71 | relu = self.relu(inputdata=conv, name='relu')
72 |
73 | return relu
74 |
75 | def _decode_block(self, input_tensor, previous_feats_tensor,
76 | out_channels_nums, name, kernel_size=4,
77 | stride=2, use_bias=False,
78 | previous_kernel_size=4, need_activate=True):
79 | """
80 |
81 | :param input_tensor:
82 | :param previous_feats_tensor:
83 | :param out_channels_nums:
84 | :param kernel_size:
85 | :param previous_kernel_size:
86 | :param use_bias:
87 | :param stride:
88 | :param name:
89 | :return:
90 | """
91 | with tf.variable_scope(name_or_scope=name):
92 |
93 | deconv_weights_stddev = tf.sqrt(
94 | tf.divide(tf.constant(2.0, tf.float32),
95 | tf.multiply(tf.cast(previous_kernel_size * previous_kernel_size, tf.float32),
96 | tf.cast(tf.shape(input_tensor)[3], tf.float32)))
97 | )
98 | deconv_weights_init = tf.truncated_normal_initializer(
99 | mean=0.0, stddev=deconv_weights_stddev)
100 |
101 | deconv = self.deconv2d(
102 | inputdata=input_tensor, out_channel=out_channels_nums, kernel_size=kernel_size,
103 | stride=stride, use_bias=use_bias, w_init=deconv_weights_init,
104 | name='deconv'
105 | )
106 |
107 | deconv = self.layerbn(inputdata=deconv, is_training=self._is_training, name='deconv_bn')
108 |
109 | deconv = self.relu(inputdata=deconv, name='deconv_relu')
110 |
111 | fuse_feats = tf.add(
112 | previous_feats_tensor, deconv, name='fuse_feats'
113 | )
114 |
115 | if need_activate:
116 |
117 | fuse_feats = self.layerbn(
118 | inputdata=fuse_feats, is_training=self._is_training, name='fuse_gn'
119 | )
120 |
121 | fuse_feats = self.relu(inputdata=fuse_feats, name='fuse_relu')
122 |
123 | return fuse_feats
124 |
125 | def _vgg16_fcn_encode(self, input_tensor, name):
126 | """
127 |
128 | :param input_tensor:
129 | :param name:
130 | :return:
131 | """
132 | with tf.variable_scope(name_or_scope=name):
133 | # encode stage 1
134 | conv_1_1 = self._vgg16_conv_stage(
135 | input_tensor=input_tensor, k_size=3,
136 | out_dims=64, name='conv1_1',
137 | need_layer_norm=True
138 | )
139 | conv_1_2 = self._vgg16_conv_stage(
140 | input_tensor=conv_1_1, k_size=3,
141 | out_dims=64, name='conv1_2',
142 | need_layer_norm=True
143 | )
144 | self._net_intermediate_results['encode_stage_1_share'] = {
145 | 'data': conv_1_2,
146 | 'shape': conv_1_2.get_shape().as_list()
147 | }
148 |
149 | # encode stage 2
150 | pool1 = self.maxpooling(
151 | inputdata=conv_1_2, kernel_size=2,
152 | stride=2, name='pool1'
153 | )
154 | conv_2_1 = self._vgg16_conv_stage(
155 | input_tensor=pool1, k_size=3,
156 | out_dims=128, name='conv2_1',
157 | need_layer_norm=True
158 | )
159 | conv_2_2 = self._vgg16_conv_stage(
160 | input_tensor=conv_2_1, k_size=3,
161 | out_dims=128, name='conv2_2',
162 | need_layer_norm=True
163 | )
164 | self._net_intermediate_results['encode_stage_2_share'] = {
165 | 'data': conv_2_2,
166 | 'shape': conv_2_2.get_shape().as_list()
167 | }
168 |
169 | # encode stage 3
170 | pool2 = self.maxpooling(
171 | inputdata=conv_2_2, kernel_size=2,
172 | stride=2, name='pool2'
173 | )
174 | conv_3_1 = self._vgg16_conv_stage(
175 | input_tensor=pool2, k_size=3,
176 | out_dims=256, name='conv3_1',
177 | need_layer_norm=True
178 | )
179 | conv_3_2 = self._vgg16_conv_stage(
180 | input_tensor=conv_3_1, k_size=3,
181 | out_dims=256, name='conv3_2',
182 | need_layer_norm=True
183 | )
184 | conv_3_3 = self._vgg16_conv_stage(
185 | input_tensor=conv_3_2, k_size=3,
186 | out_dims=256, name='conv3_3',
187 | need_layer_norm=True
188 | )
189 | self._net_intermediate_results['encode_stage_3_share'] = {
190 | 'data': conv_3_3,
191 | 'shape': conv_3_3.get_shape().as_list()
192 | }
193 |
194 | # encode stage 4
195 | pool3 = self.maxpooling(
196 | inputdata=conv_3_3, kernel_size=2,
197 | stride=2, name='pool3'
198 | )
199 | conv_4_1 = self._vgg16_conv_stage(
200 | input_tensor=pool3, k_size=3,
201 | out_dims=512, name='conv4_1',
202 | need_layer_norm=True
203 | )
204 | conv_4_2 = self._vgg16_conv_stage(
205 | input_tensor=conv_4_1, k_size=3,
206 | out_dims=512, name='conv4_2',
207 | need_layer_norm=True
208 | )
209 | conv_4_3 = self._vgg16_conv_stage(
210 | input_tensor=conv_4_2, k_size=3,
211 | out_dims=512, name='conv4_3',
212 | need_layer_norm=True
213 | )
214 | self._net_intermediate_results['encode_stage_4_share'] = {
215 | 'data': conv_4_3,
216 | 'shape': conv_4_3.get_shape().as_list()
217 | }
218 |
219 | # encode stage 5 for binary segmentation
220 | pool4 = self.maxpooling(
221 | inputdata=conv_4_3, kernel_size=2,
222 | stride=2, name='pool4'
223 | )
224 | conv_5_1_binary = self._vgg16_conv_stage(
225 | input_tensor=pool4, k_size=3,
226 | out_dims=512, name='conv5_1_binary',
227 | need_layer_norm=True
228 | )
229 | conv_5_2_binary = self._vgg16_conv_stage(
230 | input_tensor=conv_5_1_binary, k_size=3,
231 | out_dims=512, name='conv5_2_binary',
232 | need_layer_norm=True
233 | )
234 | conv_5_3_binary = self._vgg16_conv_stage(
235 | input_tensor=conv_5_2_binary, k_size=3,
236 | out_dims=512, name='conv5_3_binary',
237 | need_layer_norm=True
238 | )
239 | self._net_intermediate_results['encode_stage_5_binary'] = {
240 | 'data': conv_5_3_binary,
241 | 'shape': conv_5_3_binary.get_shape().as_list()
242 | }
243 |
244 | # encode stage 5 for instance segmentation
245 | conv_5_1_instance = self._vgg16_conv_stage(
246 | input_tensor=pool4, k_size=3,
247 | out_dims=512, name='conv5_1_instance',
248 | need_layer_norm=True
249 | )
250 | conv_5_2_instance = self._vgg16_conv_stage(
251 | input_tensor=conv_5_1_instance, k_size=3,
252 | out_dims=512, name='conv5_2_instance',
253 | need_layer_norm=True
254 | )
255 | conv_5_3_instance = self._vgg16_conv_stage(
256 | input_tensor=conv_5_2_instance, k_size=3,
257 | out_dims=512, name='conv5_3_instance',
258 | need_layer_norm=True
259 | )
260 | self._net_intermediate_results['encode_stage_5_instance'] = {
261 | 'data': conv_5_3_instance,
262 | 'shape': conv_5_3_instance.get_shape().as_list()
263 | }
264 |
265 | return
266 |
267 | def _vgg16_fcn_decode(self, name):
268 | """
269 |
270 | :return:
271 | """
272 | with tf.variable_scope(name):
273 |
274 | # decode part for binary segmentation
275 | with tf.variable_scope(name_or_scope='binary_seg_decode'):
276 |
277 | decode_stage_5_binary = self._net_intermediate_results['encode_stage_5_binary']['data']
278 |
279 | decode_stage_4_fuse = self._decode_block(
280 | input_tensor=decode_stage_5_binary,
281 | previous_feats_tensor=self._net_intermediate_results['encode_stage_4_share']['data'],
282 | name='decode_stage_4_fuse', out_channels_nums=512, previous_kernel_size=3
283 | )
284 | decode_stage_3_fuse = self._decode_block(
285 | input_tensor=decode_stage_4_fuse,
286 | previous_feats_tensor=self._net_intermediate_results['encode_stage_3_share']['data'],
287 | name='decode_stage_3_fuse', out_channels_nums=256
288 | )
289 | decode_stage_2_fuse = self._decode_block(
290 | input_tensor=decode_stage_3_fuse,
291 | previous_feats_tensor=self._net_intermediate_results['encode_stage_2_share']['data'],
292 | name='decode_stage_2_fuse', out_channels_nums=128
293 | )
294 | decode_stage_1_fuse = self._decode_block(
295 | input_tensor=decode_stage_2_fuse,
296 | previous_feats_tensor=self._net_intermediate_results['encode_stage_1_share']['data'],
297 | name='decode_stage_1_fuse', out_channels_nums=64
298 | )
299 | binary_final_logits_conv_weights_stddev = tf.sqrt(
300 | tf.divide(tf.constant(2.0, tf.float32),
301 | tf.multiply(4.0 * 4.0,
302 | tf.cast(tf.shape(decode_stage_1_fuse)[3], tf.float32)))
303 | )
304 | binary_final_logits_conv_weights_init = tf.truncated_normal_initializer(
305 | mean=0.0, stddev=binary_final_logits_conv_weights_stddev)
306 |
307 | binary_final_logits = self.conv2d(
308 | inputdata=decode_stage_1_fuse,
309 | out_channel=self._class_nums,
310 | kernel_size=1, use_bias=False,
311 | w_init=binary_final_logits_conv_weights_init,
312 | name='binary_final_logits'
313 | )
314 |
315 | self._net_intermediate_results['binary_segment_logits'] = {
316 | 'data': binary_final_logits,
317 | 'shape': binary_final_logits.get_shape().as_list()
318 | }
319 |
320 | with tf.variable_scope(name_or_scope='instance_seg_decode'):
321 |
322 | decode_stage_5_instance = self._net_intermediate_results['encode_stage_5_instance']['data']
323 |
324 | decode_stage_4_fuse = self._decode_block(
325 | input_tensor=decode_stage_5_instance,
326 | previous_feats_tensor=self._net_intermediate_results['encode_stage_4_share']['data'],
327 | name='decode_stage_4_fuse', out_channels_nums=512, previous_kernel_size=3)
328 |
329 | decode_stage_3_fuse = self._decode_block(
330 | input_tensor=decode_stage_4_fuse,
331 | previous_feats_tensor=self._net_intermediate_results['encode_stage_3_share']['data'],
332 | name='decode_stage_3_fuse', out_channels_nums=256)
333 |
334 | decode_stage_2_fuse = self._decode_block(
335 | input_tensor=decode_stage_3_fuse,
336 | previous_feats_tensor=self._net_intermediate_results['encode_stage_2_share']['data'],
337 | name='decode_stage_2_fuse', out_channels_nums=128)
338 |
339 | decode_stage_1_fuse = self._decode_block(
340 | input_tensor=decode_stage_2_fuse,
341 | previous_feats_tensor=self._net_intermediate_results['encode_stage_1_share']['data'],
342 | name='decode_stage_1_fuse', out_channels_nums=64, need_activate=False)
343 |
344 | self._net_intermediate_results['instance_segment_logits'] = {
345 | 'data': decode_stage_1_fuse,
346 | 'shape': decode_stage_1_fuse.get_shape().as_list()
347 | }
348 |
349 | def build_model(self, input_tensor, name, reuse=False):
350 | """
351 |
352 | :param input_tensor:
353 | :param name:
354 | :param reuse:
355 | :return:
356 | """
357 | with tf.variable_scope(name_or_scope=name, reuse=reuse):
358 | # vgg16 fcn encode part
359 | self._vgg16_fcn_encode(input_tensor=input_tensor, name='vgg16_encode_module')
360 | # vgg16 fcn decode part
361 | self._vgg16_fcn_decode(name='vgg16_decode_module')
362 |
363 | return self._net_intermediate_results
364 |
365 |
366 | if __name__ == '__main__':
367 | """
368 | test code
369 | """
370 | test_in_tensor = tf.placeholder(dtype=tf.float32, shape=[1, 256, 512, 3], name='input')
371 | model = VGG16FCN(phase='train', cfg=parse_config_utils.lanenet_cfg)
372 | ret = model.build_model(test_in_tensor, name='vgg16fcn')
373 | for layer_name, layer_info in ret.items():
374 | print('layer name: {:s} shape: {}'.format(layer_name, layer_info['shape']))
375 |
--------------------------------------------------------------------------------
/src/LaneDetection/models/LaneNet/lanenet_postprocess.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # @Time : 18-5-30 上午10:04
4 | # @Author : MaybeShewill-CV
5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection
6 | # @File : lanenet_postprocess.py
7 | # @IDE: PyCharm Community Edition
8 | """
9 | LaneNet model post process
10 | """
11 | import os.path as ops
12 | import math
13 |
14 | import cv2
15 | import glog as log
16 | import numpy as np
17 | from sklearn.cluster import DBSCAN
18 | from sklearn.preprocessing import StandardScaler
19 |
20 |
21 | def _morphological_process(image, kernel_size=5):
22 | """
23 | morphological process to fill the hole in the binary segmentation result
24 | :param image:
25 | :param kernel_size:
26 | :return:
27 | """
28 | if len(image.shape) == 3:
29 | raise ValueError('Binary segmentation result image should be a single channel image')
30 |
31 | if image.dtype is not np.uint8:
32 | image = np.array(image, np.uint8)
33 |
34 | kernel = cv2.getStructuringElement(shape=cv2.MORPH_ELLIPSE, ksize=(kernel_size, kernel_size))
35 |
36 | # close operation fille hole
37 | closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=1)
38 |
39 | return closing
40 |
41 |
42 | def _connect_components_analysis(image):
43 | """
44 | connect components analysis to remove the small components
45 | :param image:
46 | :return:
47 | """
48 | if len(image.shape) == 3:
49 | gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
50 | else:
51 | gray_image = image
52 |
53 | return cv2.connectedComponentsWithStats(gray_image, connectivity=8, ltype=cv2.CV_32S)
54 |
55 |
56 | class _LaneFeat(object):
57 | """
58 |
59 | """
60 | def __init__(self, feat, coord, class_id=-1):
61 | """
62 | lane feat object
63 | :param feat: lane embeddng feats [feature_1, feature_2, ...]
64 | :param coord: lane coordinates [x, y]
65 | :param class_id: lane class id
66 | """
67 | self._feat = feat
68 | self._coord = coord
69 | self._class_id = class_id
70 |
71 | @property
72 | def feat(self):
73 | """
74 |
75 | :return:
76 | """
77 | return self._feat
78 |
79 | @feat.setter
80 | def feat(self, value):
81 | """
82 |
83 | :param value:
84 | :return:
85 | """
86 | if not isinstance(value, np.ndarray):
87 | value = np.array(value, dtype=np.float64)
88 |
89 | if value.dtype != np.float32:
90 | value = np.array(value, dtype=np.float64)
91 |
92 | self._feat = value
93 |
94 | @property
95 | def coord(self):
96 | """
97 |
98 | :return:
99 | """
100 | return self._coord
101 |
102 | @coord.setter
103 | def coord(self, value):
104 | """
105 |
106 | :param value:
107 | :return:
108 | """
109 | if not isinstance(value, np.ndarray):
110 | value = np.array(value)
111 |
112 | if value.dtype != np.int32:
113 | value = np.array(value, dtype=np.int32)
114 |
115 | self._coord = value
116 |
117 | @property
118 | def class_id(self):
119 | """
120 |
121 | :return:
122 | """
123 | return self._class_id
124 |
125 | @class_id.setter
126 | def class_id(self, value):
127 | """
128 |
129 | :param value:
130 | :return:
131 | """
132 | if not isinstance(value, np.int64):
133 | raise ValueError('Class id must be integer')
134 |
135 | self._class_id = value
136 |
137 |
138 | class _LaneNetCluster(object):
139 | """
140 | Instance segmentation result cluster
141 | """
142 |
143 | def __init__(self, cfg):
144 | """
145 |
146 | """
147 | self._color_map = [np.array([255, 0, 0]),
148 | np.array([0, 255, 0]),
149 | np.array([0, 0, 255]),
150 | np.array([125, 125, 0]),
151 | np.array([0, 125, 125]),
152 | np.array([125, 0, 125]),
153 | np.array([50, 100, 50]),
154 | np.array([100, 50, 100])]
155 | self._cfg = cfg
156 |
157 | def _embedding_feats_dbscan_cluster(self, embedding_image_feats):
158 | """
159 | dbscan cluster
160 | :param embedding_image_feats:
161 | :return:
162 | """
163 | db = DBSCAN(eps=self._cfg.POSTPROCESS.DBSCAN_EPS, min_samples=self._cfg.POSTPROCESS.DBSCAN_MIN_SAMPLES)
164 | try:
165 | features = StandardScaler().fit_transform(embedding_image_feats)
166 | db.fit(features)
167 | except Exception as err:
168 | log.error(err)
169 | ret = {
170 | 'origin_features': None,
171 | 'cluster_nums': 0,
172 | 'db_labels': None,
173 | 'unique_labels': None,
174 | 'cluster_center': None
175 | }
176 | return ret
177 | db_labels = db.labels_
178 | unique_labels = np.unique(db_labels)
179 |
180 | num_clusters = len(unique_labels)
181 | cluster_centers = db.components_
182 |
183 | ret = {
184 | 'origin_features': features,
185 | 'cluster_nums': num_clusters,
186 | 'db_labels': db_labels,
187 | 'unique_labels': unique_labels,
188 | 'cluster_center': cluster_centers
189 | }
190 |
191 | return ret
192 |
193 | @staticmethod
194 | def _get_lane_embedding_feats(binary_seg_ret, instance_seg_ret):
195 | """
196 | get lane embedding features according the binary seg result
197 | :param binary_seg_ret:
198 | :param instance_seg_ret:
199 | :return:
200 | """
201 | idx = np.where(binary_seg_ret == 255)
202 | lane_embedding_feats = instance_seg_ret[idx]
203 | # idx_scale = np.vstack((idx[0] / 256.0, idx[1] / 512.0)).transpose()
204 | # lane_embedding_feats = np.hstack((lane_embedding_feats, idx_scale))
205 | lane_coordinate = np.vstack((idx[1], idx[0])).transpose()
206 |
207 | assert lane_embedding_feats.shape[0] == lane_coordinate.shape[0]
208 |
209 | ret = {
210 | 'lane_embedding_feats': lane_embedding_feats,
211 | 'lane_coordinates': lane_coordinate
212 | }
213 |
214 | return ret
215 |
216 | def apply_lane_feats_cluster(self, binary_seg_result, instance_seg_result):
217 | """
218 |
219 | :param binary_seg_result:
220 | :param instance_seg_result:
221 | :return:
222 | """
223 | # get embedding feats and coords
224 | get_lane_embedding_feats_result = self._get_lane_embedding_feats(
225 | binary_seg_ret=binary_seg_result,
226 | instance_seg_ret=instance_seg_result
227 | )
228 |
229 | # dbscan cluster
230 | dbscan_cluster_result = self._embedding_feats_dbscan_cluster(
231 | embedding_image_feats=get_lane_embedding_feats_result['lane_embedding_feats']
232 | )
233 |
234 | mask = np.zeros(shape=[binary_seg_result.shape[0], binary_seg_result.shape[1], 3], dtype=np.uint8)
235 | db_labels = dbscan_cluster_result['db_labels']
236 | unique_labels = dbscan_cluster_result['unique_labels']
237 | coord = get_lane_embedding_feats_result['lane_coordinates']
238 |
239 | if db_labels is None:
240 | return None, None
241 |
242 | lane_coords = []
243 |
244 | for index, label in enumerate(unique_labels.tolist()):
245 | if label == -1:
246 | continue
247 | idx = np.where(db_labels == label)
248 | pix_coord_idx = tuple((coord[idx][:, 1], coord[idx][:, 0]))
249 | mask[pix_coord_idx] = self._color_map[index]
250 | lane_coords.append(coord[idx])
251 |
252 | return mask, lane_coords
253 |
254 |
255 | class LaneNetPostProcessor(object):
256 | """
257 | lanenet post process for lane generation
258 | """
259 | def __init__(self, cfg, ipm_remap_file_path='./data/tusimple_ipm_remap.yml'):
260 | """
261 |
262 | :param ipm_remap_file_path: ipm generate file path
263 | """
264 | assert ops.exists(ipm_remap_file_path), '{:s} not exist'.format(ipm_remap_file_path)
265 |
266 | self._cfg = cfg
267 | self._cluster = _LaneNetCluster(cfg=cfg)
268 | self._ipm_remap_file_path = ipm_remap_file_path
269 |
270 | remap_file_load_ret = self._load_remap_matrix()
271 | self._remap_to_ipm_x = remap_file_load_ret['remap_to_ipm_x']
272 | self._remap_to_ipm_y = remap_file_load_ret['remap_to_ipm_y']
273 |
274 | self._color_map = [np.array([255, 0, 0]),
275 | np.array([0, 255, 0]),
276 | np.array([0, 0, 255]),
277 | np.array([125, 125, 0]),
278 | np.array([0, 125, 125]),
279 | np.array([125, 0, 125]),
280 | np.array([50, 100, 50]),
281 | np.array([100, 50, 100])]
282 |
283 | def _load_remap_matrix(self):
284 | """
285 |
286 | :return:
287 | """
288 | fs = cv2.FileStorage(self._ipm_remap_file_path, cv2.FILE_STORAGE_READ)
289 |
290 | remap_to_ipm_x = fs.getNode('remap_ipm_x').mat()
291 | remap_to_ipm_y = fs.getNode('remap_ipm_y').mat()
292 |
293 | ret = {
294 | 'remap_to_ipm_x': remap_to_ipm_x,
295 | 'remap_to_ipm_y': remap_to_ipm_y,
296 | }
297 |
298 | fs.release()
299 |
300 | return ret
301 |
302 | def postprocess(self, binary_seg_result, instance_seg_result=None,
303 | min_area_threshold=100, source_image=None,
304 | data_source='tusimple'):
305 | """
306 |
307 | :param binary_seg_result:
308 | :param instance_seg_result:
309 | :param min_area_threshold:
310 | :param source_image:
311 | :param data_source:
312 | :return:
313 | """
314 | # convert binary_seg_result
315 | binary_seg_result = np.array(binary_seg_result * 255, dtype=np.uint8)
316 |
317 | # apply image morphology operation to fill in the hold and reduce the small area
318 | morphological_ret = _morphological_process(binary_seg_result, kernel_size=5)
319 |
320 | connect_components_analysis_ret = _connect_components_analysis(image=morphological_ret)
321 |
322 | labels = connect_components_analysis_ret[1]
323 | stats = connect_components_analysis_ret[2]
324 | for index, stat in enumerate(stats):
325 | if stat[4] <= min_area_threshold:
326 | idx = np.where(labels == index)
327 | morphological_ret[idx] = 0
328 |
329 | # apply embedding features cluster
330 | mask_image, lane_coords = self._cluster.apply_lane_feats_cluster(
331 | binary_seg_result=morphological_ret,
332 | instance_seg_result=instance_seg_result
333 | )
334 |
335 | if mask_image is None:
336 | return {
337 | 'mask_image': None,
338 | 'fit_params': None,
339 | 'source_image': None,
340 | }
341 |
342 | # lane line fit
343 | fit_params = []
344 | src_lane_pts = [] # lane pts every single lane
345 | for lane_index, coords in enumerate(lane_coords):
346 | if data_source == 'tusimple':
347 | tmp_mask = np.zeros(shape=(720, 1280), dtype=np.uint8)
348 | tmp_mask[tuple((np.int_(coords[:, 1] * 720 / 256), np.int_(coords[:, 0] * 1280 / 512)))] = 255
349 | else:
350 | raise ValueError('Wrong data source now only support tusimple')
351 | tmp_ipm_mask = cv2.remap(
352 | tmp_mask,
353 | self._remap_to_ipm_x,
354 | self._remap_to_ipm_y,
355 | interpolation=cv2.INTER_NEAREST
356 | )
357 | nonzero_y = np.array(tmp_ipm_mask.nonzero()[0])
358 | nonzero_x = np.array(tmp_ipm_mask.nonzero()[1])
359 |
360 | fit_param = np.polyfit(nonzero_y, nonzero_x, 2)
361 | fit_params.append(fit_param)
362 |
363 | [ipm_image_height, ipm_image_width] = tmp_ipm_mask.shape
364 | plot_y = np.linspace(10, ipm_image_height, ipm_image_height - 10)
365 | fit_x = fit_param[0] * plot_y ** 2 + fit_param[1] * plot_y + fit_param[2]
366 | # fit_x = fit_param[0] * plot_y ** 3 + fit_param[1] * plot_y ** 2 + fit_param[2] * plot_y + fit_param[3]
367 |
368 | lane_pts = []
369 | for index in range(0, plot_y.shape[0], 5):
370 | src_x = self._remap_to_ipm_x[
371 | int(plot_y[index]), int(np.clip(fit_x[index], 0, ipm_image_width - 1))]
372 | if src_x <= 0:
373 | continue
374 | src_y = self._remap_to_ipm_y[
375 | int(plot_y[index]), int(np.clip(fit_x[index], 0, ipm_image_width - 1))]
376 | src_y = src_y if src_y > 0 else 0
377 |
378 | lane_pts.append([src_x, src_y])
379 |
380 | src_lane_pts.append(lane_pts)
381 |
382 | # tusimple test data sample point along y axis every 10 pixels
383 | source_image_width = source_image.shape[1]
384 | for index, single_lane_pts in enumerate(src_lane_pts):
385 | single_lane_pt_x = np.array(single_lane_pts, dtype=np.float32)[:, 0]
386 | single_lane_pt_y = np.array(single_lane_pts, dtype=np.float32)[:, 1]
387 | if data_source == 'tusimple':
388 | start_plot_y = 240
389 | end_plot_y = 720
390 | else:
391 | raise ValueError('Wrong data source now only support tusimple')
392 | step = int(math.floor((end_plot_y - start_plot_y) / 10))
393 | for plot_y in np.linspace(start_plot_y, end_plot_y, step):
394 | diff = single_lane_pt_y - plot_y
395 | fake_diff_bigger_than_zero = diff.copy()
396 | fake_diff_smaller_than_zero = diff.copy()
397 | fake_diff_bigger_than_zero[np.where(diff <= 0)] = float('inf')
398 | fake_diff_smaller_than_zero[np.where(diff > 0)] = float('-inf')
399 | idx_low = np.argmax(fake_diff_smaller_than_zero)
400 | idx_high = np.argmin(fake_diff_bigger_than_zero)
401 |
402 | previous_src_pt_x = single_lane_pt_x[idx_low]
403 | previous_src_pt_y = single_lane_pt_y[idx_low]
404 | last_src_pt_x = single_lane_pt_x[idx_high]
405 | last_src_pt_y = single_lane_pt_y[idx_high]
406 |
407 | if previous_src_pt_y < start_plot_y or last_src_pt_y < start_plot_y or \
408 | fake_diff_smaller_than_zero[idx_low] == float('-inf') or \
409 | fake_diff_bigger_than_zero[idx_high] == float('inf'):
410 | continue
411 |
412 | interpolation_src_pt_x = (abs(previous_src_pt_y - plot_y) * previous_src_pt_x +
413 | abs(last_src_pt_y - plot_y) * last_src_pt_x) / \
414 | (abs(previous_src_pt_y - plot_y) + abs(last_src_pt_y - plot_y))
415 | interpolation_src_pt_y = (abs(previous_src_pt_y - plot_y) * previous_src_pt_y +
416 | abs(last_src_pt_y - plot_y) * last_src_pt_y) / \
417 | (abs(previous_src_pt_y - plot_y) + abs(last_src_pt_y - plot_y))
418 |
419 | if interpolation_src_pt_x > source_image_width or interpolation_src_pt_x < 10:
420 | continue
421 |
422 | lane_color = self._color_map[index].tolist()
423 | cv2.circle(source_image, (int(interpolation_src_pt_x),
424 | int(interpolation_src_pt_y)), 5, lane_color, -1)
425 | ret = {
426 | 'mask_image': mask_image,
427 | 'fit_params': fit_params,
428 | 'source_image': source_image,
429 | }
430 |
431 | return ret
432 |
--------------------------------------------------------------------------------
/src/LaneDetection/models/LaneNet/cnn_basenet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time : 17-9-18 下午3:59
4 | # @Author : MaybeShewill-CV
5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection
6 | # @File : cnn_basenet.py
7 | # @IDE: PyCharm Community Edition
8 | """
9 | The base convolution neural networks mainly implement some useful cnn functions
10 | """
11 | import tensorflow as tf
12 | import numpy as np
13 |
14 |
15 | class CNNBaseModel(object):
16 | """
17 | Base model for other specific cnn ctpn_models
18 | """
19 |
20 | def __init__(self):
21 | pass
22 |
23 | @staticmethod
24 | def conv2d(inputdata, out_channel, kernel_size, padding='SAME',
25 | stride=1, w_init=None, b_init=None,
26 | split=1, use_bias=True, data_format='NHWC', name=None):
27 | """
28 | Packing the tensorflow conv2d function.
29 | :param name: op name
30 | :param inputdata: A 4D tensorflow tensor which ust have known number of channels, but can have other
31 | unknown dimensions.
32 | :param out_channel: number of output channel.
33 | :param kernel_size: int so only support square kernel convolution
34 | :param padding: 'VALID' or 'SAME'
35 | :param stride: int so only support square stride
36 | :param w_init: initializer for convolution weights
37 | :param b_init: initializer for bias
38 | :param split: split channels as used in Alexnet mainly group for GPU memory save.
39 | :param use_bias: whether to use bias.
40 | :param data_format: default set to NHWC according tensorflow
41 | :return: tf.Tensor named ``output``
42 | """
43 | with tf.variable_scope(name):
44 | in_shape = inputdata.get_shape().as_list()
45 | channel_axis = 3 if data_format == 'NHWC' else 1
46 | in_channel = in_shape[channel_axis]
47 | assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!"
48 | assert in_channel % split == 0
49 | assert out_channel % split == 0
50 |
51 | padding = padding.upper()
52 |
53 | if isinstance(kernel_size, list):
54 | filter_shape = [kernel_size[0], kernel_size[1]] + [in_channel / split, out_channel]
55 | else:
56 | filter_shape = [kernel_size, kernel_size] + [in_channel / split, out_channel]
57 |
58 | if isinstance(stride, list):
59 | strides = [1, stride[0], stride[1], 1] if data_format == 'NHWC' \
60 | else [1, 1, stride[0], stride[1]]
61 | else:
62 | strides = [1, stride, stride, 1] if data_format == 'NHWC' \
63 | else [1, 1, stride, stride]
64 |
65 | if w_init is None:
66 | w_init = tf.contrib.layers.variance_scaling_initializer()
67 | if b_init is None:
68 | b_init = tf.constant_initializer()
69 |
70 | w = tf.get_variable('W', filter_shape, initializer=w_init)
71 | b = None
72 |
73 | if use_bias:
74 | b = tf.get_variable('b', [out_channel], initializer=b_init)
75 |
76 | if split == 1:
77 | conv = tf.nn.conv2d(inputdata, w, strides, padding, data_format=data_format)
78 | else:
79 | inputs = tf.split(inputdata, split, channel_axis)
80 | kernels = tf.split(w, split, 3)
81 | outputs = [tf.nn.conv2d(i, k, strides, padding, data_format=data_format)
82 | for i, k in zip(inputs, kernels)]
83 | conv = tf.concat(outputs, channel_axis)
84 |
85 | ret = tf.identity(tf.nn.bias_add(conv, b, data_format=data_format)
86 | if use_bias else conv, name=name)
87 |
88 | return ret
89 |
90 | @staticmethod
91 | def depthwise_conv(input_tensor, kernel_size, name, depth_multiplier=1,
92 | padding='SAME', stride=1):
93 | """
94 |
95 | :param input_tensor:
96 | :param kernel_size:
97 | :param name:
98 | :param depth_multiplier:
99 | :param padding:
100 | :param stride:
101 | :return:
102 | """
103 | with tf.variable_scope(name_or_scope=name):
104 | in_shape = input_tensor.get_shape().as_list()
105 | in_channel = in_shape[3]
106 | padding = padding.upper()
107 |
108 | depthwise_filter_shape = [kernel_size, kernel_size] + [in_channel, depth_multiplier]
109 | w_init = tf.contrib.layers.variance_scaling_initializer()
110 |
111 | depthwise_filter = tf.get_variable(
112 | name='depthwise_filter_w', shape=depthwise_filter_shape,
113 | initializer=w_init
114 | )
115 |
116 | result = tf.nn.depthwise_conv2d(
117 | input=input_tensor,
118 | filter=depthwise_filter,
119 | strides=[1, stride, stride, 1],
120 | padding=padding,
121 | name='depthwise_conv_output'
122 | )
123 | return result
124 |
125 | @staticmethod
126 | def relu(inputdata, name=None):
127 | """
128 |
129 | :param name:
130 | :param inputdata:
131 | :return:
132 | """
133 | return tf.nn.relu(features=inputdata, name=name)
134 |
135 | @staticmethod
136 | def sigmoid(inputdata, name=None):
137 | """
138 |
139 | :param name:
140 | :param inputdata:
141 | :return:
142 | """
143 | return tf.nn.sigmoid(x=inputdata, name=name)
144 |
145 | @staticmethod
146 | def maxpooling(inputdata, kernel_size, stride=None, padding='VALID',
147 | data_format='NHWC', name=None):
148 | """
149 |
150 | :param name:
151 | :param inputdata:
152 | :param kernel_size:
153 | :param stride:
154 | :param padding:
155 | :param data_format:
156 | :return:
157 | """
158 | padding = padding.upper()
159 |
160 | if stride is None:
161 | stride = kernel_size
162 |
163 | if isinstance(kernel_size, list):
164 | kernel = [1, kernel_size[0], kernel_size[1], 1] if data_format == 'NHWC' else \
165 | [1, 1, kernel_size[0], kernel_size[1]]
166 | else:
167 | kernel = [1, kernel_size, kernel_size, 1] if data_format == 'NHWC' \
168 | else [1, 1, kernel_size, kernel_size]
169 |
170 | if isinstance(stride, list):
171 | strides = [1, stride[0], stride[1], 1] if data_format == 'NHWC' \
172 | else [1, 1, stride[0], stride[1]]
173 | else:
174 | strides = [1, stride, stride, 1] if data_format == 'NHWC' \
175 | else [1, 1, stride, stride]
176 |
177 | return tf.nn.max_pool(value=inputdata, ksize=kernel, strides=strides, padding=padding,
178 | data_format=data_format, name=name)
179 |
180 | @staticmethod
181 | def avgpooling(inputdata, kernel_size, stride=None, padding='VALID',
182 | data_format='NHWC', name=None):
183 | """
184 |
185 | :param name:
186 | :param inputdata:
187 | :param kernel_size:
188 | :param stride:
189 | :param padding:
190 | :param data_format:
191 | :return:
192 | """
193 | if stride is None:
194 | stride = kernel_size
195 |
196 | kernel = [1, kernel_size, kernel_size, 1] if data_format == 'NHWC' \
197 | else [1, 1, kernel_size, kernel_size]
198 |
199 | strides = [1, stride, stride, 1] if data_format == 'NHWC' else [1, 1, stride, stride]
200 |
201 | return tf.nn.avg_pool(value=inputdata, ksize=kernel, strides=strides, padding=padding,
202 | data_format=data_format, name=name)
203 |
204 | @staticmethod
205 | def globalavgpooling(inputdata, data_format='NHWC', name=None):
206 | """
207 |
208 | :param name:
209 | :param inputdata:
210 | :param data_format:
211 | :return:
212 | """
213 | assert inputdata.shape.ndims == 4
214 | assert data_format in ['NHWC', 'NCHW']
215 |
216 | axis = [1, 2] if data_format == 'NHWC' else [2, 3]
217 |
218 | return tf.reduce_mean(input_tensor=inputdata, axis=axis, name=name)
219 |
220 | @staticmethod
221 | def layernorm(inputdata, epsilon=1e-5, use_bias=True, use_scale=True,
222 | data_format='NHWC', name=None):
223 | """
224 | :param name:
225 | :param inputdata:
226 | :param epsilon: epsilon to avoid divide-by-zero.
227 | :param use_bias: whether to use the extra affine transformation or not.
228 | :param use_scale: whether to use the extra affine transformation or not.
229 | :param data_format:
230 | :return:
231 | """
232 | shape = inputdata.get_shape().as_list()
233 | ndims = len(shape)
234 | assert ndims in [2, 4]
235 |
236 | mean, var = tf.nn.moments(inputdata, list(range(1, len(shape))), keep_dims=True)
237 |
238 | if data_format == 'NCHW':
239 | channnel = shape[1]
240 | new_shape = [1, channnel, 1, 1]
241 | else:
242 | channnel = shape[-1]
243 | new_shape = [1, 1, 1, channnel]
244 | if ndims == 2:
245 | new_shape = [1, channnel]
246 |
247 | if use_bias:
248 | beta = tf.get_variable('beta', [channnel], initializer=tf.constant_initializer())
249 | beta = tf.reshape(beta, new_shape)
250 | else:
251 | beta = tf.zeros([1] * ndims, name='beta')
252 | if use_scale:
253 | gamma = tf.get_variable('gamma', [channnel], initializer=tf.constant_initializer(1.0))
254 | gamma = tf.reshape(gamma, new_shape)
255 | else:
256 | gamma = tf.ones([1] * ndims, name='gamma')
257 |
258 | return tf.nn.batch_normalization(inputdata, mean, var, beta, gamma, epsilon, name=name)
259 |
260 | @staticmethod
261 | def instancenorm(inputdata, epsilon=1e-5, data_format='NHWC', use_affine=True, name=None):
262 | """
263 |
264 | :param name:
265 | :param inputdata:
266 | :param epsilon:
267 | :param data_format:
268 | :param use_affine:
269 | :return:
270 | """
271 | shape = inputdata.get_shape().as_list()
272 | if len(shape) != 4:
273 | raise ValueError("Input data of instancebn layer has to be 4D tensor")
274 |
275 | if data_format == 'NHWC':
276 | axis = [1, 2]
277 | ch = shape[3]
278 | new_shape = [1, 1, 1, ch]
279 | else:
280 | axis = [2, 3]
281 | ch = shape[1]
282 | new_shape = [1, ch, 1, 1]
283 | if ch is None:
284 | raise ValueError("Input of instancebn require known channel!")
285 |
286 | mean, var = tf.nn.moments(inputdata, axis, keep_dims=True)
287 |
288 | if not use_affine:
289 | return tf.divide(inputdata - mean, tf.sqrt(var + epsilon), name='output')
290 |
291 | beta = tf.get_variable('beta', [ch], initializer=tf.constant_initializer())
292 | beta = tf.reshape(beta, new_shape)
293 | gamma = tf.get_variable('gamma', [ch], initializer=tf.constant_initializer(1.0))
294 | gamma = tf.reshape(gamma, new_shape)
295 | return tf.nn.batch_normalization(inputdata, mean, var, beta, gamma, epsilon, name=name)
296 |
297 | @staticmethod
298 | def dropout(inputdata, keep_prob, noise_shape=None, name=None):
299 | """
300 |
301 | :param name:
302 | :param inputdata:
303 | :param keep_prob:
304 | :param noise_shape:
305 | :return:
306 | """
307 | return tf.nn.dropout(inputdata, keep_prob=keep_prob, noise_shape=noise_shape, name=name)
308 |
309 | @staticmethod
310 | def fullyconnect(inputdata, out_dim, w_init=None, b_init=None,
311 | use_bias=True, name=None):
312 | """
313 | Fully-Connected layer, takes a N>1D tensor and returns a 2D tensor.
314 | It is an equivalent of `tf.layers.dense` except for naming conventions.
315 |
316 | :param inputdata: a tensor to be flattened except for the first dimension.
317 | :param out_dim: output dimension
318 | :param w_init: initializer for w. Defaults to `variance_scaling_initializer`.
319 | :param b_init: initializer for b. Defaults to zero
320 | :param use_bias: whether to use bias.
321 | :param name:
322 | :return: tf.Tensor: a NC tensor named ``output`` with attribute `variables`.
323 | """
324 | shape = inputdata.get_shape().as_list()[1:]
325 | if None not in shape:
326 | inputdata = tf.reshape(inputdata, [-1, int(np.prod(shape))])
327 | else:
328 | inputdata = tf.reshape(inputdata, tf.stack([tf.shape(inputdata)[0], -1]))
329 |
330 | if w_init is None:
331 | w_init = tf.contrib.layers.variance_scaling_initializer()
332 | if b_init is None:
333 | b_init = tf.constant_initializer()
334 |
335 | ret = tf.layers.dense(inputs=inputdata, activation=lambda x: tf.identity(x, name='output'),
336 | use_bias=use_bias, name=name,
337 | kernel_initializer=w_init, bias_initializer=b_init,
338 | trainable=True, units=out_dim)
339 | return ret
340 |
341 | @staticmethod
342 | def layerbn(inputdata, is_training, name, scale=True):
343 | """
344 |
345 | :param inputdata:
346 | :param is_training:
347 | :param name:
348 | :param scale:
349 | :return:
350 | """
351 |
352 | return tf.layers.batch_normalization(inputs=inputdata, training=is_training, name=name, scale=scale)
353 |
354 | @staticmethod
355 | def layergn(inputdata, name, group_size=32, esp=1e-5):
356 | """
357 |
358 | :param inputdata:
359 | :param name:
360 | :param group_size:
361 | :param esp:
362 | :return:
363 | """
364 | with tf.variable_scope(name):
365 | inputdata = tf.transpose(inputdata, [0, 3, 1, 2])
366 | n, c, h, w = inputdata.get_shape().as_list()
367 | group_size = min(group_size, c)
368 | inputdata = tf.reshape(inputdata, [-1, group_size, c // group_size, h, w])
369 | mean, var = tf.nn.moments(inputdata, [2, 3, 4], keep_dims=True)
370 | inputdata = (inputdata - mean) / tf.sqrt(var + esp)
371 |
372 | # 每个通道的gamma和beta
373 | gamma = tf.Variable(tf.constant(1.0, shape=[c]), dtype=tf.float32, name='gamma')
374 | beta = tf.Variable(tf.constant(0.0, shape=[c]), dtype=tf.float32, name='beta')
375 | gamma = tf.reshape(gamma, [1, c, 1, 1])
376 | beta = tf.reshape(beta, [1, c, 1, 1])
377 |
378 | # 根据论文进行转换 [n, c, h, w, c] 到 [n, h, w, c]
379 | output = tf.reshape(inputdata, [-1, c, h, w])
380 | output = output * gamma + beta
381 | output = tf.transpose(output, [0, 2, 3, 1])
382 |
383 | return output
384 |
385 | @staticmethod
386 | def squeeze(inputdata, axis=None, name=None):
387 | """
388 |
389 | :param inputdata:
390 | :param axis:
391 | :param name:
392 | :return:
393 | """
394 | return tf.squeeze(input=inputdata, axis=axis, name=name)
395 |
396 | @staticmethod
397 | def deconv2d(inputdata, out_channel, kernel_size, padding='SAME',
398 | stride=1, w_init=None, b_init=None,
399 | use_bias=True, activation=None, data_format='channels_last',
400 | trainable=True, name=None):
401 | """
402 | Packing the tensorflow conv2d function.
403 | :param name: op name
404 | :param inputdata: A 4D tensorflow tensor which ust have known number of channels, but can have other
405 | unknown dimensions.
406 | :param out_channel: number of output channel.
407 | :param kernel_size: int so only support square kernel convolution
408 | :param padding: 'VALID' or 'SAME'
409 | :param stride: int so only support square stride
410 | :param w_init: initializer for convolution weights
411 | :param b_init: initializer for bias
412 | :param activation: whether to apply a activation func to deconv result
413 | :param use_bias: whether to use bias.
414 | :param data_format: default set to NHWC according tensorflow
415 | :return: tf.Tensor named ``output``
416 | """
417 | with tf.variable_scope(name):
418 | in_shape = inputdata.get_shape().as_list()
419 | channel_axis = 3 if data_format == 'channels_last' else 1
420 | in_channel = in_shape[channel_axis]
421 | assert in_channel is not None, "[Deconv2D] Input cannot have unknown channel!"
422 |
423 | padding = padding.upper()
424 |
425 | if w_init is None:
426 | w_init = tf.contrib.layers.variance_scaling_initializer()
427 | if b_init is None:
428 | b_init = tf.constant_initializer()
429 |
430 | ret = tf.layers.conv2d_transpose(inputs=inputdata, filters=out_channel,
431 | kernel_size=kernel_size,
432 | strides=stride, padding=padding,
433 | data_format=data_format,
434 | activation=activation, use_bias=use_bias,
435 | kernel_initializer=w_init,
436 | bias_initializer=b_init, trainable=trainable,
437 | name=name)
438 | return ret
439 |
440 | @staticmethod
441 | def dilation_conv(input_tensor, k_size, out_dims, rate, padding='SAME',
442 | w_init=None, b_init=None, use_bias=False, name=None):
443 | """
444 |
445 | :param input_tensor:
446 | :param k_size:
447 | :param out_dims:
448 | :param rate:
449 | :param padding:
450 | :param w_init:
451 | :param b_init:
452 | :param use_bias:
453 | :param name:
454 | :return:
455 | """
456 | with tf.variable_scope(name):
457 | in_shape = input_tensor.get_shape().as_list()
458 | in_channel = in_shape[3]
459 | assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!"
460 |
461 | padding = padding.upper()
462 |
463 | if isinstance(k_size, list):
464 | filter_shape = [k_size[0], k_size[1]] + [in_channel, out_dims]
465 | else:
466 | filter_shape = [k_size, k_size] + [in_channel, out_dims]
467 |
468 | if w_init is None:
469 | w_init = tf.contrib.layers.variance_scaling_initializer()
470 | if b_init is None:
471 | b_init = tf.constant_initializer()
472 |
473 | w = tf.get_variable('W', filter_shape, initializer=w_init)
474 | b = None
475 |
476 | if use_bias:
477 | b = tf.get_variable('b', [out_dims], initializer=b_init)
478 |
479 | conv = tf.nn.atrous_conv2d(value=input_tensor, filters=w, rate=rate,
480 | padding=padding, name='dilation_conv')
481 |
482 | if use_bias:
483 | ret = tf.add(conv, b)
484 | else:
485 | ret = conv
486 |
487 | return ret
488 |
489 | @staticmethod
490 | def spatial_dropout(input_tensor, keep_prob, is_training, name, seed=1234):
491 | """
492 | 空间dropout实现
493 | :param input_tensor:
494 | :param keep_prob:
495 | :param is_training:
496 | :param name:
497 | :param seed:
498 | :return:
499 | """
500 |
501 | def f1():
502 | input_shape = input_tensor.get_shape().as_list()
503 | noise_shape = tf.constant(value=[input_shape[0], 1, 1, input_shape[3]])
504 | return tf.nn.dropout(input_tensor, keep_prob, noise_shape, seed=seed, name="spatial_dropout")
505 |
506 | def f2():
507 | return input_tensor
508 |
509 | with tf.variable_scope(name_or_scope=name):
510 |
511 | output = tf.cond(is_training, f1, f2)
512 |
513 | return output
514 |
515 | @staticmethod
516 | def lrelu(inputdata, name, alpha=0.2):
517 | """
518 |
519 | :param inputdata:
520 | :param alpha:
521 | :param name:
522 | :return:
523 | """
524 | with tf.variable_scope(name):
525 | return tf.nn.relu(inputdata) - alpha * tf.nn.relu(-inputdata)
526 |
--------------------------------------------------------------------------------
/src/LaneDetection/models/LaneNet/bisenet_v2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # @Time : 2020/4/9 上午11:05
4 | # @Author : MaybeShewill-CV
5 | # @Site : https://github.com/MaybeShewill-CV/bisenetv2-tensorflow
6 | # @File : bisenet_v2.py
7 | # @IDE: PyCharm
8 | """
9 | BiseNet V2 Model
10 | """
11 | import collections
12 |
13 | import tensorflow as tf
14 |
15 | import cnn_basenet
16 | import parse_config_utils
17 |
18 |
19 | class _StemBlock(cnn_basenet.CNNBaseModel):
20 | """
21 | implementation of stem block module
22 | """
23 | def __init__(self, phase):
24 | """
25 |
26 | :param phase:
27 | """
28 | super(_StemBlock, self).__init__()
29 | self._phase = phase
30 | self._is_training = self._is_net_for_training()
31 | self._padding = 'SAME'
32 |
33 | def _is_net_for_training(self):
34 | """
35 | if the net is used for training or not
36 | :return:
37 | """
38 | if isinstance(self._phase, tf.Tensor):
39 | phase = self._phase
40 | else:
41 | phase = tf.constant(self._phase, dtype=tf.string)
42 | return tf.equal(phase, tf.constant('train', dtype=tf.string))
43 |
44 | def _conv_block(self, input_tensor, k_size, output_channels, stride,
45 | name, padding='SAME', use_bias=False, need_activate=False):
46 | """
47 | conv block in attention refine
48 | :param input_tensor:
49 | :param k_size:
50 | :param output_channels:
51 | :param stride:
52 | :param name:
53 | :param padding:
54 | :param use_bias:
55 | :return:
56 | """
57 | with tf.variable_scope(name_or_scope=name):
58 | result = self.conv2d(
59 | inputdata=input_tensor,
60 | out_channel=output_channels,
61 | kernel_size=k_size,
62 | padding=padding,
63 | stride=stride,
64 | use_bias=use_bias,
65 | name='conv'
66 | )
67 | if need_activate:
68 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
69 | result = self.relu(inputdata=result, name='relu')
70 | else:
71 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
72 | return result
73 |
74 | def __call__(self, *args, **kwargs):
75 | """
76 |
77 | :param args:
78 | :param kwargs:
79 | :return:
80 | """
81 | input_tensor = kwargs['input_tensor']
82 | name_scope = kwargs['name']
83 | output_channels = kwargs['output_channels']
84 | if 'padding' in kwargs:
85 | self._padding = kwargs['padding']
86 | with tf.variable_scope(name_or_scope=name_scope):
87 | input_tensor = self._conv_block(
88 | input_tensor=input_tensor,
89 | k_size=3,
90 | output_channels=output_channels,
91 | stride=2,
92 | name='conv_block_1',
93 | padding=self._padding,
94 | use_bias=False,
95 | need_activate=True
96 | )
97 | with tf.variable_scope(name_or_scope='downsample_branch_left'):
98 | branch_left_output = self._conv_block(
99 | input_tensor=input_tensor,
100 | k_size=1,
101 | output_channels=int(output_channels / 2),
102 | stride=1,
103 | name='1x1_conv_block',
104 | padding=self._padding,
105 | use_bias=False,
106 | need_activate=True
107 | )
108 | branch_left_output = self._conv_block(
109 | input_tensor=branch_left_output,
110 | k_size=3,
111 | output_channels=output_channels,
112 | stride=2,
113 | name='3x3_conv_block',
114 | padding=self._padding,
115 | use_bias=False,
116 | need_activate=True
117 | )
118 | with tf.variable_scope(name_or_scope='downsample_branch_right'):
119 | branch_right_output = self.maxpooling(
120 | inputdata=input_tensor,
121 | kernel_size=3,
122 | stride=2,
123 | padding=self._padding,
124 | name='maxpooling_block'
125 | )
126 | result = tf.concat([branch_left_output, branch_right_output], axis=-1, name='concate_features')
127 | result = self._conv_block(
128 | input_tensor=result,
129 | k_size=3,
130 | output_channels=output_channels,
131 | stride=1,
132 | name='final_conv_block',
133 | padding=self._padding,
134 | use_bias=False,
135 | need_activate=True
136 | )
137 | return result
138 |
139 |
140 | class _ContextEmbedding(cnn_basenet.CNNBaseModel):
141 | """
142 | implementation of context embedding module in bisenetv2
143 | """
144 | def __init__(self, phase):
145 | """
146 |
147 | :param phase:
148 | """
149 | super(_ContextEmbedding, self).__init__()
150 | self._phase = phase
151 | self._is_training = self._is_net_for_training()
152 | self._padding = 'SAME'
153 |
154 | def _is_net_for_training(self):
155 | """
156 | if the net is used for training or not
157 | :return:
158 | """
159 | if isinstance(self._phase, tf.Tensor):
160 | phase = self._phase
161 | else:
162 | phase = tf.constant(self._phase, dtype=tf.string)
163 | return tf.equal(phase, tf.constant('train', dtype=tf.string))
164 |
165 | def _conv_block(self, input_tensor, k_size, output_channels, stride,
166 | name, padding='SAME', use_bias=False, need_activate=False):
167 | """
168 | conv block in attention refine
169 | :param input_tensor:
170 | :param k_size:
171 | :param output_channels:
172 | :param stride:
173 | :param name:
174 | :param padding:
175 | :param use_bias:
176 | :return:
177 | """
178 | with tf.variable_scope(name_or_scope=name):
179 | result = self.conv2d(
180 | inputdata=input_tensor,
181 | out_channel=output_channels,
182 | kernel_size=k_size,
183 | padding=padding,
184 | stride=stride,
185 | use_bias=use_bias,
186 | name='conv'
187 | )
188 | if need_activate:
189 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
190 | result = self.relu(inputdata=result, name='relu')
191 | else:
192 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
193 | return result
194 |
195 | def __call__(self, *args, **kwargs):
196 | """
197 |
198 | :param args:
199 | :param kwargs:
200 | :return:
201 | """
202 | input_tensor = kwargs['input_tensor']
203 | name_scope = kwargs['name']
204 | output_channels = input_tensor.get_shape().as_list()[-1]
205 | if 'padding' in kwargs:
206 | self._padding = kwargs['padding']
207 | with tf.variable_scope(name_or_scope=name_scope):
208 | result = tf.reduce_mean(input_tensor, axis=[1, 2], keepdims=True, name='global_avg_pooling')
209 | result = self.layerbn(result, self._is_training, 'bn')
210 | result = self._conv_block(
211 | input_tensor=result,
212 | k_size=1,
213 | output_channels=output_channels,
214 | stride=1,
215 | name='conv_block_1',
216 | padding=self._padding,
217 | use_bias=False,
218 | need_activate=True
219 | )
220 | result = tf.add(result, input_tensor, name='fused_features')
221 | result = self.conv2d(
222 | inputdata=result,
223 | out_channel=output_channels,
224 | kernel_size=3,
225 | padding=self._padding,
226 | stride=1,
227 | use_bias=False,
228 | name='final_conv_block'
229 | )
230 | return result
231 |
232 |
233 | class _GatherExpansion(cnn_basenet.CNNBaseModel):
234 | """
235 | implementation of gather and expansion module in bisenetv2
236 | """
237 | def __init__(self, phase):
238 | """
239 |
240 | :param phase:
241 | """
242 | super(_GatherExpansion, self).__init__()
243 | self._phase = phase
244 | self._is_training = self._is_net_for_training()
245 | self._padding = 'SAME'
246 | self._stride = 1
247 | self._expansion_factor = 6
248 |
249 | def _is_net_for_training(self):
250 | """
251 | if the net is used for training or not
252 | :return:
253 | """
254 | if isinstance(self._phase, tf.Tensor):
255 | phase = self._phase
256 | else:
257 | phase = tf.constant(self._phase, dtype=tf.string)
258 | return tf.equal(phase, tf.constant('train', dtype=tf.string))
259 |
260 | def _conv_block(self, input_tensor, k_size, output_channels, stride,
261 | name, padding='SAME', use_bias=False, need_activate=False):
262 | """
263 | conv block in attention refine
264 | :param input_tensor:
265 | :param k_size:
266 | :param output_channels:
267 | :param stride:
268 | :param name:
269 | :param padding:
270 | :param use_bias:
271 | :return:
272 | """
273 | with tf.variable_scope(name_or_scope=name):
274 | result = self.conv2d(
275 | inputdata=input_tensor,
276 | out_channel=output_channels,
277 | kernel_size=k_size,
278 | padding=padding,
279 | stride=stride,
280 | use_bias=use_bias,
281 | name='conv'
282 | )
283 | if need_activate:
284 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
285 | result = self.relu(inputdata=result, name='relu')
286 | else:
287 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
288 | return result
289 |
290 | def _apply_ge_when_stride_equal_one(self, input_tensor, e, name):
291 | """
292 |
293 | :param input_tensor:
294 | :param e:
295 | :param name
296 | :return:
297 | """
298 | input_tensor_channels = input_tensor.get_shape().as_list()[-1]
299 | with tf.variable_scope(name_or_scope=name):
300 | result = self._conv_block(
301 | input_tensor=input_tensor,
302 | k_size=3,
303 | output_channels=input_tensor_channels,
304 | stride=1,
305 | name='3x3_conv_block',
306 | padding=self._padding,
307 | use_bias=False,
308 | need_activate=True
309 | )
310 | result = self.depthwise_conv(
311 | input_tensor=result,
312 | kernel_size=3,
313 | depth_multiplier=e,
314 | padding=self._padding,
315 | stride=1,
316 | name='depthwise_conv_block'
317 | )
318 | result = self.layerbn(result, self._is_training, name='dw_bn')
319 | result = self._conv_block(
320 | input_tensor=result,
321 | k_size=1,
322 | output_channels=input_tensor_channels,
323 | stride=1,
324 | name='1x1_conv_block',
325 | padding=self._padding,
326 | use_bias=False,
327 | need_activate=False
328 | )
329 | result = tf.add(input_tensor, result, name='fused_features')
330 | result = self.relu(result, name='ge_output')
331 | return result
332 |
333 | def _apply_ge_when_stride_equal_two(self, input_tensor, output_channels, e, name):
334 | """
335 |
336 | :param input_tensor:
337 | :param output_channels:
338 | :param e:
339 | :param name
340 | :return:
341 | """
342 | input_tensor_channels = input_tensor.get_shape().as_list()[-1]
343 | with tf.variable_scope(name_or_scope=name):
344 | input_proj = self.depthwise_conv(
345 | input_tensor=input_tensor,
346 | kernel_size=3,
347 | name='input_project_dw_conv_block',
348 | depth_multiplier=1,
349 | padding=self._padding,
350 | stride=self._stride
351 | )
352 | input_proj = self.layerbn(input_proj, self._is_training, name='input_project_bn')
353 | input_proj = self._conv_block(
354 | input_tensor=input_proj,
355 | k_size=1,
356 | output_channels=output_channels,
357 | stride=1,
358 | name='input_project_1x1_conv_block',
359 | padding=self._padding,
360 | use_bias=False,
361 | need_activate=False
362 | )
363 |
364 | result = self._conv_block(
365 | input_tensor=input_tensor,
366 | k_size=3,
367 | output_channels=input_tensor_channels,
368 | stride=1,
369 | name='3x3_conv_block',
370 | padding=self._padding,
371 | use_bias=False,
372 | need_activate=True
373 | )
374 | result = self.depthwise_conv(
375 | input_tensor=result,
376 | kernel_size=3,
377 | depth_multiplier=e,
378 | padding=self._padding,
379 | stride=2,
380 | name='depthwise_conv_block_1'
381 | )
382 | result = self.layerbn(result, self._is_training, name='dw_bn_1')
383 | result = self.depthwise_conv(
384 | input_tensor=result,
385 | kernel_size=3,
386 | depth_multiplier=1,
387 | padding=self._padding,
388 | stride=1,
389 | name='depthwise_conv_block_2'
390 | )
391 | result = self.layerbn(result, self._is_training, name='dw_bn_2')
392 | result = self._conv_block(
393 | input_tensor=result,
394 | k_size=1,
395 | output_channels=output_channels,
396 | stride=1,
397 | name='1x1_conv_block',
398 | padding=self._padding,
399 | use_bias=False,
400 | need_activate=False
401 | )
402 | result = tf.add(input_proj, result, name='fused_features')
403 | result = self.relu(result, name='ge_output')
404 | return result
405 |
406 | def __call__(self, *args, **kwargs):
407 | """
408 |
409 | :param args:
410 | :param kwargs:
411 | :return:
412 | """
413 | input_tensor = kwargs['input_tensor']
414 | name_scope = kwargs['name']
415 | output_channels = input_tensor.get_shape().as_list()[-1]
416 | if 'output_channels' in kwargs:
417 | output_channels = kwargs['output_channels']
418 | if 'padding' in kwargs:
419 | self._padding = kwargs['padding']
420 | if 'stride' in kwargs:
421 | self._stride = kwargs['stride']
422 | if 'e' in kwargs:
423 | self._expansion_factor = kwargs['e']
424 |
425 | with tf.variable_scope(name_or_scope=name_scope):
426 | if self._stride == 1:
427 | result = self._apply_ge_when_stride_equal_one(
428 | input_tensor=input_tensor,
429 | e=self._expansion_factor,
430 | name='stride_equal_one_module'
431 | )
432 | elif self._stride == 2:
433 | result = self._apply_ge_when_stride_equal_two(
434 | input_tensor=input_tensor,
435 | output_channels=output_channels,
436 | e=self._expansion_factor,
437 | name='stride_equal_two_module'
438 | )
439 | else:
440 | raise NotImplementedError('No function matched with stride of {}'.format(self._stride))
441 | return result
442 |
443 |
444 | class _GuidedAggregation(cnn_basenet.CNNBaseModel):
445 | """
446 | implementation of guided aggregation module in bisenetv2
447 | """
448 |
449 | def __init__(self, phase):
450 | """
451 |
452 | :param phase:
453 | """
454 | super(_GuidedAggregation, self).__init__()
455 | self._phase = phase
456 | self._is_training = self._is_net_for_training()
457 | self._padding = 'SAME'
458 |
459 | def _is_net_for_training(self):
460 | """
461 | if the net is used for training or not
462 | :return:
463 | """
464 | if isinstance(self._phase, tf.Tensor):
465 | phase = self._phase
466 | else:
467 | phase = tf.constant(self._phase, dtype=tf.string)
468 | return tf.equal(phase, tf.constant('train', dtype=tf.string))
469 |
470 | def _conv_block(self, input_tensor, k_size, output_channels, stride,
471 | name, padding='SAME', use_bias=False, need_activate=False):
472 | """
473 | conv block in attention refine
474 | :param input_tensor:
475 | :param k_size:
476 | :param output_channels:
477 | :param stride:
478 | :param name:
479 | :param padding:
480 | :param use_bias:
481 | :return:
482 | """
483 | with tf.variable_scope(name_or_scope=name):
484 | result = self.conv2d(
485 | inputdata=input_tensor,
486 | out_channel=output_channels,
487 | kernel_size=k_size,
488 | padding=padding,
489 | stride=stride,
490 | use_bias=use_bias,
491 | name='conv'
492 | )
493 | if need_activate:
494 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
495 | result = self.relu(inputdata=result, name='relu')
496 | else:
497 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
498 | return result
499 |
500 | def __call__(self, *args, **kwargs):
501 | """
502 |
503 | :param args:
504 | :param kwargs:
505 | :return:
506 | """
507 | detail_input_tensor = kwargs['detail_input_tensor']
508 | semantic_input_tensor = kwargs['semantic_input_tensor']
509 | name_scope = kwargs['name']
510 | output_channels = detail_input_tensor.get_shape().as_list()[-1]
511 | if 'padding' in kwargs:
512 | self._padding = kwargs['padding']
513 |
514 | with tf.variable_scope(name_or_scope=name_scope):
515 | with tf.variable_scope(name_or_scope='detail_branch'):
516 | detail_branch_remain = self.depthwise_conv(
517 | input_tensor=detail_input_tensor,
518 | kernel_size=3,
519 | name='3x3_dw_conv_block',
520 | depth_multiplier=1,
521 | padding=self._padding,
522 | stride=1
523 | )
524 | detail_branch_remain = self.layerbn(detail_branch_remain, self._is_training, name='bn_1')
525 | detail_branch_remain = self.conv2d(
526 | inputdata=detail_branch_remain,
527 | out_channel=output_channels,
528 | kernel_size=1,
529 | padding=self._padding,
530 | stride=1,
531 | use_bias=False,
532 | name='1x1_conv_block'
533 | )
534 |
535 | detail_branch_downsample = self._conv_block(
536 | input_tensor=detail_input_tensor,
537 | k_size=3,
538 | output_channels=output_channels,
539 | stride=2,
540 | name='3x3_conv_block',
541 | padding=self._padding,
542 | use_bias=False,
543 | need_activate=False
544 | )
545 | detail_branch_downsample = self.avgpooling(
546 | inputdata=detail_branch_downsample,
547 | kernel_size=3,
548 | stride=2,
549 | padding=self._padding,
550 | name='avg_pooling_block'
551 | )
552 |
553 | with tf.variable_scope(name_or_scope='semantic_branch'):
554 | semantic_branch_remain = self.depthwise_conv(
555 | input_tensor=semantic_input_tensor,
556 | kernel_size=3,
557 | name='3x3_dw_conv_block',
558 | depth_multiplier=1,
559 | padding=self._padding,
560 | stride=1
561 | )
562 | semantic_branch_remain = self.layerbn(semantic_branch_remain, self._is_training, name='bn_1')
563 | semantic_branch_remain = self.conv2d(
564 | inputdata=semantic_branch_remain,
565 | out_channel=output_channels,
566 | kernel_size=1,
567 | padding=self._padding,
568 | stride=1,
569 | use_bias=False,
570 | name='1x1_conv_block'
571 | )
572 | semantic_branch_remain = self.sigmoid(semantic_branch_remain, name='semantic_remain_sigmoid')
573 |
574 | semantic_branch_upsample = self._conv_block(
575 | input_tensor=semantic_input_tensor,
576 | k_size=3,
577 | output_channels=output_channels,
578 | stride=1,
579 | name='3x3_conv_block',
580 | padding=self._padding,
581 | use_bias=False,
582 | need_activate=False
583 | )
584 | semantic_branch_upsample = tf.image.resize_bilinear(
585 | semantic_branch_upsample,
586 | detail_input_tensor.shape[1:3],
587 | name='semantic_upsample_features'
588 | )
589 | semantic_branch_upsample = self.sigmoid(semantic_branch_upsample, name='semantic_upsample_sigmoid')
590 |
591 | with tf.variable_scope(name_or_scope='aggregation_features'):
592 | guided_features_remain = tf.multiply(
593 | detail_branch_remain,
594 | semantic_branch_upsample,
595 | name='guided_detail_features'
596 | )
597 | guided_features_downsample = tf.multiply(
598 | detail_branch_downsample,
599 | semantic_branch_remain,
600 | name='guided_semantic_features'
601 | )
602 | guided_features_upsample = tf.image.resize_bilinear(
603 | guided_features_downsample,
604 | detail_input_tensor.shape[1:3],
605 | name='guided_upsample_features'
606 | )
607 | guided_features = tf.add(guided_features_remain, guided_features_upsample, name='fused_features')
608 | guided_features = self._conv_block(
609 | input_tensor=guided_features,
610 | k_size=3,
611 | output_channels=output_channels,
612 | stride=1,
613 | name='aggregation_feature_output',
614 | padding=self._padding,
615 | use_bias=False,
616 | need_activate=True
617 | )
618 | return guided_features
619 |
620 |
621 | class _SegmentationHead(cnn_basenet.CNNBaseModel):
622 | """
623 | implementation of segmentation head in bisenet v2
624 | """
625 | def __init__(self, phase):
626 | """
627 |
628 | """
629 | super(_SegmentationHead, self).__init__()
630 | self._phase = phase
631 | self._is_training = self._is_net_for_training()
632 | self._padding = 'SAME'
633 |
634 | def _is_net_for_training(self):
635 | """
636 | if the net is used for training or not
637 | :return:
638 | """
639 | if isinstance(self._phase, tf.Tensor):
640 | phase = self._phase
641 | else:
642 | phase = tf.constant(self._phase, dtype=tf.string)
643 | return tf.equal(phase, tf.constant('train', dtype=tf.string))
644 |
645 | def _conv_block(self, input_tensor, k_size, output_channels, stride,
646 | name, padding='SAME', use_bias=False, need_activate=False):
647 | """
648 | conv block in attention refine
649 | :param input_tensor:
650 | :param k_size:
651 | :param output_channels:
652 | :param stride:
653 | :param name:
654 | :param padding:
655 | :param use_bias:
656 | :return:
657 | """
658 | with tf.variable_scope(name_or_scope=name):
659 | result = self.conv2d(
660 | inputdata=input_tensor,
661 | out_channel=output_channels,
662 | kernel_size=k_size,
663 | padding=padding,
664 | stride=stride,
665 | use_bias=use_bias,
666 | name='conv'
667 | )
668 | if need_activate:
669 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
670 | result = self.relu(inputdata=result, name='relu')
671 | else:
672 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
673 | return result
674 |
675 | def __call__(self, *args, **kwargs):
676 | """
677 |
678 | :param args:
679 | :param kwargs:
680 | :return:
681 | """
682 | input_tensor = kwargs['input_tensor']
683 | name_scope = kwargs['name']
684 | ratio = kwargs['upsample_ratio']
685 | input_tensor_size = input_tensor.get_shape().as_list()[1:3]
686 | output_tensor_size = [int(tmp * ratio) for tmp in input_tensor_size]
687 | feature_dims = kwargs['feature_dims']
688 | classes_nums = kwargs['classes_nums']
689 | if 'padding' in kwargs:
690 | self._padding = kwargs['padding']
691 |
692 | with tf.variable_scope(name_or_scope=name_scope):
693 | result = self._conv_block(
694 | input_tensor=input_tensor,
695 | k_size=3,
696 | output_channels=feature_dims,
697 | stride=1,
698 | name='3x3_conv_block',
699 | padding=self._padding,
700 | use_bias=False,
701 | need_activate=True
702 | )
703 | result = self.conv2d(
704 | inputdata=result,
705 | out_channel=classes_nums,
706 | kernel_size=1,
707 | padding=self._padding,
708 | stride=1,
709 | use_bias=False,
710 | name='1x1_conv_block'
711 | )
712 | result = tf.image.resize_bilinear(
713 | result,
714 | output_tensor_size,
715 | name='segmentation_head_logits'
716 | )
717 | return result
718 |
719 |
720 | class BiseNetV2(cnn_basenet.CNNBaseModel):
721 | """
722 | implementation of bisenet v2
723 | """
724 | def __init__(self, phase, cfg):
725 | """
726 |
727 | """
728 | super(BiseNetV2, self).__init__()
729 | self._cfg = cfg
730 | self._phase = phase
731 | self._is_training = self._is_net_for_training()
732 |
733 | # set model hyper params
734 | self._class_nums = self._cfg.DATASET.NUM_CLASSES
735 | self._weights_decay = self._cfg.SOLVER.WEIGHT_DECAY
736 | self._loss_type = self._cfg.SOLVER.LOSS_TYPE
737 | self._enable_ohem = self._cfg.SOLVER.OHEM.ENABLE
738 | if self._enable_ohem:
739 | self._ohem_score_thresh = self._cfg.SOLVER.OHEM.SCORE_THRESH
740 | self._ohem_min_sample_nums = self._cfg.SOLVER.OHEM.MIN_SAMPLE_NUMS
741 | self._ge_expand_ratio = self._cfg.MODEL.BISENETV2.GE_EXPAND_RATIO
742 | self._semantic_channel_ratio = self._cfg.MODEL.BISENETV2.SEMANTIC_CHANNEL_LAMBDA
743 | self._seg_head_ratio = self._cfg.MODEL.BISENETV2.SEGHEAD_CHANNEL_EXPAND_RATIO
744 |
745 | # set module used in bisenetv2
746 | self._se_block = _StemBlock(phase=phase)
747 | self._context_embedding_block = _ContextEmbedding(phase=phase)
748 | self._ge_block = _GatherExpansion(phase=phase)
749 | self._guided_aggregation_block = _GuidedAggregation(phase=phase)
750 | self._seg_head_block = _SegmentationHead(phase=phase)
751 |
752 | # set detail branch channels
753 | self._detail_branch_channels = self._build_detail_branch_hyper_params()
754 | # set semantic branch channels
755 | self._semantic_branch_channels = self._build_semantic_branch_hyper_params()
756 |
757 | # set op block params
758 | self._block_maps = {
759 | 'conv_block': self._conv_block,
760 | 'se': self._se_block,
761 | 'ge': self._ge_block,
762 | 'ce': self._context_embedding_block,
763 | }
764 |
765 | self._net_intermediate_results = collections.OrderedDict()
766 |
767 | def _is_net_for_training(self):
768 | """
769 | if the net is used for training or not
770 | :return:
771 | """
772 | if isinstance(self._phase, tf.Tensor):
773 | phase = self._phase
774 | else:
775 | phase = tf.constant(self._phase, dtype=tf.string)
776 | return tf.equal(phase, tf.constant('train', dtype=tf.string))
777 |
778 | @classmethod
779 | def _build_detail_branch_hyper_params(cls):
780 | """
781 |
782 | :return:
783 | """
784 | params = [
785 | ('stage_1', [('conv_block', 3, 64, 2, 1), ('conv_block', 3, 64, 1, 1)]),
786 | ('stage_2', [('conv_block', 3, 64, 2, 1), ('conv_block', 3, 64, 1, 2)]),
787 | ('stage_3', [('conv_block', 3, 128, 2, 1), ('conv_block', 3, 128, 1, 2)]),
788 | ]
789 | return collections.OrderedDict(params)
790 |
791 | def _build_semantic_branch_hyper_params(self):
792 | """
793 |
794 | :return:
795 | """
796 | stage_1_channels = int(self._detail_branch_channels['stage_1'][0][2] * self._semantic_channel_ratio)
797 | stage_3_channels = int(self._detail_branch_channels['stage_3'][0][2] * self._semantic_channel_ratio)
798 | params = [
799 | ('stage_1', [('se', 3, stage_1_channels, 1, 4, 1)]),
800 | ('stage_3', [('ge', 3, stage_3_channels, self._ge_expand_ratio, 2, 1),
801 | ('ge', 3, stage_3_channels, self._ge_expand_ratio, 1, 1)]),
802 | ('stage_4', [('ge', 3, stage_3_channels * 2, self._ge_expand_ratio, 2, 1),
803 | ('ge', 3, stage_3_channels * 2, self._ge_expand_ratio, 1, 1)]),
804 | ('stage_5', [('ge', 3, stage_3_channels * 4, self._ge_expand_ratio, 2, 1),
805 | ('ge', 3, stage_3_channels * 4, self._ge_expand_ratio, 1, 3),
806 | ('ce', 3, stage_3_channels * 4, self._ge_expand_ratio, 1, 1)])
807 | ]
808 | return collections.OrderedDict(params)
809 |
810 | def _conv_block(self, input_tensor, k_size, output_channels, stride,
811 | name, padding='SAME', use_bias=False, need_activate=False):
812 | """
813 | conv block in attention refine
814 | :param input_tensor:
815 | :param k_size:
816 | :param output_channels:
817 | :param stride:
818 | :param name:
819 | :param padding:
820 | :param use_bias:
821 | :return:
822 | """
823 | with tf.variable_scope(name_or_scope=name):
824 | result = self.conv2d(
825 | inputdata=input_tensor,
826 | out_channel=output_channels,
827 | kernel_size=k_size,
828 | padding=padding,
829 | stride=stride,
830 | use_bias=use_bias,
831 | name='conv'
832 | )
833 | if need_activate:
834 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
835 | result = self.relu(inputdata=result, name='relu')
836 | else:
837 | result = self.layerbn(inputdata=result, is_training=self._is_training, name='bn', scale=True)
838 | return result
839 |
840 | def build_detail_branch(self, input_tensor, name):
841 | """
842 |
843 | :param input_tensor:
844 | :param name:
845 | :return:
846 | """
847 | result = input_tensor
848 | with tf.variable_scope(name_or_scope=name):
849 | for stage_name, stage_params in self._detail_branch_channels.items():
850 | with tf.variable_scope(stage_name):
851 | for block_index, param in enumerate(stage_params):
852 | block_op = self._block_maps[param[0]]
853 | k_size = param[1]
854 | output_channels = param[2]
855 | stride = param[3]
856 | repeat_times = param[4]
857 | for repeat_index in range(repeat_times):
858 | with tf.variable_scope(name_or_scope='conv_block_{:d}_repeat_{:d}'.format(
859 | block_index + 1, repeat_index + 1)):
860 | if stage_name == 'stage_3' and block_index == 1 and repeat_index == 1:
861 | result = block_op(
862 | input_tensor=result,
863 | k_size=k_size,
864 | output_channels=output_channels,
865 | stride=stride,
866 | name='3x3_conv',
867 | padding='SAME',
868 | use_bias=False,
869 | need_activate=False
870 | )
871 | else:
872 | result = block_op(
873 | input_tensor=result,
874 | k_size=k_size,
875 | output_channels=output_channels,
876 | stride=stride,
877 | name='3x3_conv',
878 | padding='SAME',
879 | use_bias=False,
880 | need_activate=True
881 | )
882 | return result
883 |
884 | def build_semantic_branch(self, input_tensor, name, prepare_data_for_booster=False):
885 | """
886 |
887 | :param input_tensor:
888 | :param name:
889 | :param prepare_data_for_booster:
890 | :return:
891 | """
892 | seg_head_inputs = collections.OrderedDict()
893 | result = input_tensor
894 | source_input_tensor_size = input_tensor.get_shape().as_list()[1:3]
895 | with tf.variable_scope(name_or_scope=name):
896 | for stage_name, stage_params in self._semantic_branch_channels.items():
897 | seg_head_input = input_tensor
898 | with tf.variable_scope(stage_name):
899 | for block_index, param in enumerate(stage_params):
900 | block_op_name = param[0]
901 | block_op = self._block_maps[block_op_name]
902 | output_channels = param[2]
903 | expand_ratio = param[3]
904 | stride = param[4]
905 | repeat_times = param[5]
906 | for repeat_index in range(repeat_times):
907 | with tf.variable_scope(name_or_scope='{:s}_block_{:d}_repeat_{:d}'.format(
908 | block_op_name, block_index + 1, repeat_index + 1)):
909 | if block_op_name == 'ge':
910 | result = block_op(
911 | input_tensor=result,
912 | name='gather_expansion_block',
913 | stride=stride,
914 | e=expand_ratio,
915 | output_channels=output_channels
916 | )
917 | seg_head_input = result
918 | elif block_op_name == 'ce':
919 | result = block_op(
920 | input_tensor=result,
921 | name='context_embedding_block'
922 | )
923 | elif block_op_name == 'se':
924 | result = block_op(
925 | input_tensor=result,
926 | output_channels=output_channels,
927 | name='stem_block'
928 | )
929 | seg_head_input = result
930 | else:
931 | raise NotImplementedError('Not support block type: {:s}'.format(block_op_name))
932 | if prepare_data_for_booster:
933 | result_tensor_size = result.get_shape().as_list()[1:3]
934 | result_tensor_dims = result.get_shape().as_list()[-1]
935 | upsample_ratio = int(source_input_tensor_size[0] / result_tensor_size[0])
936 | feature_dims = result_tensor_dims * self._seg_head_ratio
937 | seg_head_inputs[stage_name] = self._seg_head_block(
938 | input_tensor=seg_head_input,
939 | name='block_{:d}_seg_head_block'.format(block_index + 1),
940 | upsample_ratio=upsample_ratio,
941 | feature_dims=feature_dims,
942 | classes_nums=self._class_nums
943 | )
944 | return result, seg_head_inputs
945 |
946 | def build_aggregation_branch(self, detail_output, semantic_output, name):
947 | """
948 |
949 | :param detail_output:
950 | :param semantic_output:
951 | :param name:
952 | :return:
953 | """
954 | with tf.variable_scope(name_or_scope=name):
955 | result = self._guided_aggregation_block(
956 | detail_input_tensor=detail_output,
957 | semantic_input_tensor=semantic_output,
958 | name='guided_aggregation_block'
959 | )
960 | return result
961 |
962 | def build_instance_segmentation_branch(self, input_tensor, name):
963 | """
964 |
965 | :param input_tensor:
966 | :param name:
967 | :return:
968 | """
969 | input_tensor_size = input_tensor.get_shape().as_list()[1:3]
970 | output_tensor_size = [int(tmp * 8) for tmp in input_tensor_size]
971 |
972 | with tf.variable_scope(name_or_scope=name):
973 | output_tensor = self._conv_block(
974 | input_tensor=input_tensor,
975 | k_size=3,
976 | output_channels=64,
977 | stride=1,
978 | name='conv_3x3',
979 | use_bias=False,
980 | need_activate=True
981 | )
982 | output_tensor = self._conv_block(
983 | input_tensor=output_tensor,
984 | k_size=1,
985 | output_channels=128,
986 | stride=1,
987 | name='conv_1x1',
988 | use_bias=False,
989 | need_activate=False
990 | )
991 | output_tensor = tf.image.resize_bilinear(
992 | output_tensor,
993 | output_tensor_size,
994 | name='instance_logits'
995 | )
996 | return output_tensor
997 |
998 | def build_binary_segmentation_branch(self, input_tensor, name):
999 | """
1000 |
1001 | :param input_tensor:
1002 | :param name:
1003 | :return:
1004 | """
1005 | input_tensor_size = input_tensor.get_shape().as_list()[1:3]
1006 | output_tensor_size = [int(tmp * 8) for tmp in input_tensor_size]
1007 |
1008 | with tf.variable_scope(name_or_scope=name):
1009 | output_tensor = self._conv_block(
1010 | input_tensor=input_tensor,
1011 | k_size=3,
1012 | output_channels=64,
1013 | stride=1,
1014 | name='conv_3x3',
1015 | use_bias=False,
1016 | need_activate=True
1017 | )
1018 | output_tensor = self._conv_block(
1019 | input_tensor=output_tensor,
1020 | k_size=1,
1021 | output_channels=128,
1022 | stride=1,
1023 | name='conv_1x1',
1024 | use_bias=False,
1025 | need_activate=True
1026 | )
1027 | output_tensor = self._conv_block(
1028 | input_tensor=output_tensor,
1029 | k_size=1,
1030 | output_channels=self._class_nums,
1031 | stride=1,
1032 | name='final_conv',
1033 | use_bias=False,
1034 | need_activate=False
1035 | )
1036 | output_tensor = tf.image.resize_bilinear(
1037 | output_tensor,
1038 | output_tensor_size,
1039 | name='binary_logits'
1040 | )
1041 | return output_tensor
1042 |
1043 | def build_model(self, input_tensor, name, reuse=False):
1044 | """
1045 |
1046 | :param input_tensor:
1047 | :param name:
1048 | :param reuse:
1049 | :return:
1050 | """
1051 | with tf.variable_scope(name_or_scope=name, reuse=reuse):
1052 | # build detail branch
1053 | detail_branch_output = self.build_detail_branch(
1054 | input_tensor=input_tensor,
1055 | name='detail_branch'
1056 | )
1057 | # build semantic branch
1058 | semantic_branch_output, _ = self.build_semantic_branch(
1059 | input_tensor=input_tensor,
1060 | name='semantic_branch',
1061 | prepare_data_for_booster=False
1062 | )
1063 | # build aggregation branch
1064 | aggregation_branch_output = self.build_aggregation_branch(
1065 | detail_output=detail_branch_output,
1066 | semantic_output=semantic_branch_output,
1067 | name='aggregation_branch'
1068 | )
1069 | # build binary and instance segmentation branch
1070 | binary_seg_branch_output = self.build_binary_segmentation_branch(
1071 | input_tensor=aggregation_branch_output,
1072 | name='binary_segmentation_branch'
1073 | )
1074 | instance_seg_branch_output = self.build_instance_segmentation_branch(
1075 | input_tensor=aggregation_branch_output,
1076 | name='instance_segmentation_branch'
1077 | )
1078 | # gather frontend output result
1079 | self._net_intermediate_results['binary_segment_logits'] = {
1080 | 'data': binary_seg_branch_output,
1081 | 'shape': binary_seg_branch_output.get_shape().as_list()
1082 | }
1083 | self._net_intermediate_results['instance_segment_logits'] = {
1084 | 'data': instance_seg_branch_output,
1085 | 'shape': instance_seg_branch_output.get_shape().as_list()
1086 | }
1087 | return self._net_intermediate_results
1088 |
1089 |
1090 | if __name__ == '__main__':
1091 | """
1092 | test code
1093 | """
1094 | test_in_tensor = tf.placeholder(dtype=tf.float32, shape=[1, 256, 512, 3], name='input')
1095 | model = BiseNetV2(phase='train', cfg=parse_config_utils.lanenet_cfg)
1096 | ret = model.build_model(test_in_tensor, name='bisenetv2')
1097 | for layer_name, layer_info in ret.items():
1098 | print('layer name: {:s} shape: {}'.format(layer_name, layer_info['shape']))
1099 |
1100 |
--------------------------------------------------------------------------------