-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
150 lines (124 loc) · 4.33 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import torch
import torch.nn as nn
import utils
from utils import model_info
architecture_config = [
# layer = (out_channels, kernel size, padding, stride)
# input (3,448,448)
(64, 7, 3, 2), # (64,224,224) (448-7+3*2)/2+1=224 discard the last element
'MP', # max pooling 2x2 stride=2 -> (64,112,112) 224/2=112
(192, 3, 1, 1), # (192,112,112) (112-3+1*2)/1+1=112
'MP', # max pooling 2x2 stride=2 -> (192,56,56)
(128, 1, 0, 1),
(256, 3, 1, 1),
(256, 1, 0, 1),
(512, 3, 1, 1), # (512,56,56)
'MP', # max pooling 2x2 stride=2 -> (512,28,28)
# [layer1, layer2, repeat times]
[(256, 1, 0, 1), (512, 3, 1, 1), 4],
(512, 1, 0, 1),
(1024, 3, 1, 1), # (1024,28,28)
'MP', # max pooling 2x2 stride=2 -> (1024,14,14)
[(512, 1, 0, 1), (1024, 3, 1, 1), 2],
(1024, 3, 1, 1),
(1024, 3, 1, 2), # (1024,7,7) (14-3+1*2)/2+1=7 discard the last element
(1024, 3, 1, 1),
(512, 3, 1, 1)
]
class ConvBlock(nn.Module):
"""
CBL Convolution BatchNormalization LeakyRelu
Actually, YOLOv1 don't have batchNormalization.
"""
def __init__(self, in_channels, out_channels, **kwargs):
super(ConvBlock, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
# batch normalization
self.bn = nn.BatchNorm2d(out_channels)
# activation function
self.af = nn.LeakyReLU(0.1)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.af(x)
return x
class YOLO(nn.Module):
"""
YOLOv1 backbone (You Only Look Once)
more detail about network architecture in img/YOLOv1_backbone1.png
"""
def __init__(self, in_channels=3, **kwargs):
super(YOLO, self).__init__()
self.arch_conf = architecture_config
self.in_channels = in_channels
self.backbone = self._create_conv_layers(architecture_config)
self.fc = self._create_fc(**kwargs)
def forward(self, x):
x = self.backbone(x)
x = self.fc(x)
return x
def _create_conv_layers(self, arch):
blk = []
in_channels = self.in_channels
for layer in arch:
if type(layer) == tuple:
blk.append(
ConvBlock(
in_channels, layer[0], kernel_size=layer[1],
padding=layer[2], stride=layer[3]
)
)
in_channels = layer[0]
elif type(layer) == str and layer == 'MP':
blk.append(
nn.MaxPool2d(kernel_size=2, stride=2)
)
elif type(layer) == list:
num_repeat = layer[2]
conv1 = layer[0]
conv2 = layer[1]
for _ in range(num_repeat):
blk.append(
ConvBlock(
in_channels, conv1[0], kernel_size=conv1[1],
padding=conv1[2], stride=conv1[3]
)
)
blk.append(
ConvBlock(
# pay attention to here!
# this is conv1[0], not in_channels
conv1[0], conv2[0], kernel_size=conv2[1],
padding=conv2[2], stride=conv2[3]
)
)
in_channels = conv2[0]
return nn.Sequential(*blk)
def _create_fc(self, S=7, B=2, C=20):
"""
S: S*S=num_grid_cell
B: num_bounding_box
C: num_class
"""
return nn.Sequential(
nn.Flatten(),
nn.Linear(512 * 7 * 7, 2048),
nn.Dropout(0.6),
nn.LeakyReLU(0.1),
nn.Linear(2048, S * S * (5 * B + C))
)
def model_detail(self):
print(self.backbone)
print(self.fc)
def test_model_forward():
model = YOLO(S=7, B=2, C=1)
model_info(model)
X = torch.rand(size=(1, 3, 448, 448))
for layer in model.backbone:
X = layer(X)
print(layer.__class__.__name__, 'output shape:\t', X.shape)
for layer in model.fc:
X = layer(X)
print(layer.__class__.__name__, 'output shape:\t', X.shape)
if __name__ == '__main__':
test_model_forward()