-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDilatedConvolution.py
128 lines (100 loc) · 7.17 KB
/
DilatedConvolution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import tensorflow as tf
class DilatedConvolution(object):
def __init__(self, dataset, input_tensor, w_pretrained, trainable):
self.dataset = dataset
self.input_tensor = input_tensor
self.w_pretrained = w_pretrained
self.trainable = trainable
self.softmax = self.dilated_convolution_by_pretrained()
pass
# 卷积
def _conv(self, name, input, strides=list([1, 1, 1, 1]), padding="VALID", add_bias=True, apply_relu=True, atrous_rate=None):
with tf.variable_scope(name):
w_kernel = tf.Variable(initial_value=self.w_pretrained[name + '/kernel:0'], trainable=self.trainable)
if atrous_rate is None:
conv_out = tf.nn.conv2d(input, w_kernel, strides, padding)
else:
# 首先,对于空洞卷积,将卷积核“膨胀”:
# 1.若进行VALID卷积,需要对输入padding atrous_rate-1个0,否则最外围的输入值就没有被卷积。
# 所以,如果对输入不进行padding,空洞卷积的输出应该减少:
# reduce_size = {[k_size * atrous_rate + (atrous_rate - 1)] - 1}
# 由于对输入padding了 2 * (atrous_rate - 1)个0,所以最终减少了 reduce_size - 2 * (atrous_rate - 1)
# 2.若进行SAME卷积,输入和输出的形状相同。
# 3.计算感受野:
# 1)相对于原始输入计算:
# Define the receptive field of an element p in F_i+1 as the set of elements in F_0
# that modify the value of F_i+1(p).
# 计算公式为:F_i+1(p) = 2 * F_i(p) - 1, F_0 = k_size
# 2)相对与前一个输入:
# 感受野大小为:“核的大小” - “最外围无效的区域”
# [k_size * atrous_rate + (atrous_rate - 1)] - 2 * (atrous_rate - 1)
# 图像语义分割需要获取“上下文信息”,
# 1.“不使用空洞卷积的传统卷积”通过“下采样(pooling)”来增大感受野(卷积核的大小不变,减少了数据量,
# 有利于防止过拟合,但是损失了分辨率,丢失了一些信息),以此获取上下文信息。
# 2.“空洞卷积”通过“膨胀卷积核”来增大感受野(参数个数没有增加,数据量没有减少,分辨率没有损失,
# 因此,计算量增大,内存消耗增大),以此获取上下文信息。
conv_out = tf.nn.atrous_conv2d(input, w_kernel, atrous_rate, padding)
if add_bias:
w_bias = tf.Variable(initial_value=self.w_pretrained[name + '/bias:0'], trainable=self.trainable)
conv_out = tf.nn.bias_add(conv_out, w_bias)
if apply_relu:
conv_out = tf.nn.relu(conv_out)
return conv_out
def dilated_convolution_by_pretrained(self):
# Check on dataset name
if self.dataset not in ['cityscapes', 'camvid']:
raise ValueError('Dataset "{}" not supported.'.format(self.dataset))
else:
h = self.input_tensor # [1396, 1396, 3]
h = self._conv('conv1_1', h) # [1394, 1394, 64]
h = self._conv('conv1_2', h) # [1392, 1392, 64]
h = tf.layers.max_pooling2d(h, pool_size=(2, 2), strides=(2, 2), padding='valid', name='pool1') # [696, 696, 64]
h = self._conv('conv2_1', h) # [694, 694, 128]
h = self._conv('conv2_2', h) # [692, 692, 128]
h = tf.layers.max_pooling2d(h, pool_size=(2, 2), strides=(2, 2), padding='valid', name='pool2') # [346, 346, 128]
h = self._conv('conv3_1', h) # [344, 344, 256]
h = self._conv('conv3_2', h) # [342, 342, 256]
h = self._conv('conv3_3', h) # [340, 340, 256]
h = tf.layers.max_pooling2d(h, pool_size=(2, 2), strides=(2, 2), padding='valid', name='pool3') # [170, 170, 256]
h = self._conv('conv4_1', h) # [168, 168, 512]
h = self._conv('conv4_2', h) # [166, 166, 512]
h = self._conv('conv4_3', h) # [164, 164, 512]
h = self._conv('conv5_1', h, atrous_rate=2) # [160, 160, 512]
h = self._conv('conv5_2', h, atrous_rate=2) # [156, 156, 512]
h = self._conv('conv5_3', h, atrous_rate=2) # [152, 152, 512]
h = self._conv('fc6', h, atrous_rate=4) # [128, 128, 4096] (k=7)
h = tf.layers.dropout(h, rate=0.5, name='drop6')
h = self._conv('fc7', h) # [128, 128, 4096]
h = tf.layers.dropout(h, rate=0.5, name='drop7')
h = self._conv('final', h) # [128, 128, 19]
# 上面属于Front-End
# 下面开始Context network architecture
h = tf.pad(h, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', name='ctx_pad1_1') # [130, 130, 19]
h = self._conv('ctx_conv1_1', h) # [128, 128, 19] receptive field=[3, 3]
h = tf.pad(h, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', name='ctx_pad1_2') # [130, 130, 19]
h = self._conv('ctx_conv1_2', h) # [128, 128, 19] rf=[5, 5]
h = tf.pad(h, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', name='ctx_pad2_1') # [132, 132, 19]
h = self._conv('ctx_conv2_1', h, atrous_rate=2) # [128, 128, 19] rf=[9, 9]
h = tf.pad(h, [[0, 0], [4, 4], [4, 4], [0, 0]], mode='CONSTANT', name='ctx_pad3_1') # [136, 136, 19]
h = self._conv('ctx_conv3_1', h, atrous_rate=4) # [128, 128, 19] rf=[17, 17]
h = tf.pad(h, [[0, 0], [8, 8], [8, 8], [0, 0]], mode='CONSTANT', name='ctx_pad4_1') # [144, 144, 19]
h = self._conv('ctx_conv4_1', h, atrous_rate=8) # [128, 128, 19] rf=[33, 33]
h = tf.pad(h, [[0, 0], [16, 16], [16, 16], [0, 0]], mode='CONSTANT', name='ctx_pad5_1') # [160, 160, 19]
h = self._conv('ctx_conv5_1', h, atrous_rate=16) # [128, 128, 19] rf=[65, 65]
if self.dataset == 'cityscapes':
h = tf.pad(h, [[0, 0], [32, 32], [32, 32], [0, 0]], mode='CONSTANT', name='ctx_pad6_1') # [192, 192, 19]
h = self._conv('ctx_conv6_1', h, atrous_rate=32) # [128, 128, 19] rf=[129, 129]
h = tf.pad(h, [[0, 0], [64, 64], [64, 64], [0, 0]], mode='CONSTANT', name='ctx_pad7_1') # [256, 256, 19]
h = self._conv('ctx_conv7_1', h, atrous_rate=64) # [128, 128, 19] rf=[257, 257]
pass
h = tf.pad(h, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', name='ctx_pad_fc1') # [130, 130, 19]
h = self._conv('ctx_fc1', h) # [128, 128, 19]
h = self._conv('ctx_final', h, padding='VALID', add_bias=True, apply_relu=False) # [128, 128, 19]
if self.dataset == 'cityscapes':
h = tf.image.resize_bilinear(h, size=(1024, 1024)) # [1024, 1024, 19]
logits = self._conv('ctx_upsample', h, padding='SAME', add_bias=False, apply_relu=True) # [1024, 1024, 19]
else:
logits = h # [128, 128, 19]
softmax = tf.nn.softmax(logits, dim=3, name='softmax')
return softmax
pass