深度学习模型之CNN(十)使用pytorch搭建ResNet并基于迁移学习训练

工程目录

1
2
3
4
5
6
7
8
├── Test5_resnet
├── model.py(模型文件)
├── train.py(调用模型训练,自动生成class_indices.json,resNet.pth)
├── predict.py(调用模型进行预测)
├── tulip.jpg(用来根据前期的训练结果来predict图片类型)
├── resnet-pre.pth(用于迁移学习时,提前下载好官方的resNet权重脚本)
└── data_set
└── data数据集

原论文中分别对应18、34、50、101、152层的网络结构参数一览表

原论文中的参数列表

model.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import torch.nn as nn
import torch


class BasicBlock(nn.Module):
expansion = 1

def __init__(self, in_channel, out_channel, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample

def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)

out += identity
out = self.relu(out)

return out


class Bottleneck(nn.Module):
expansion = 4

def __init__(self, in_channel, out_channel, stride=1, downsample=None):
super(Bottleneck, self).__init__()

self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=1, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
# -----------------------------------------
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(out_channel)
# -----------------------------------------
self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,
kernel_size=1, stride=1, bias=False)
self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample

def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)

out = self.conv3(out)
out = self.bn3(out)

out += identity
out = self.relu(out)

return out


class ResNet(nn.Module):

def __init__(self, block, blocks_num, num_classes=1000, include_top=True, groups=1, width_per_group=64):
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64

self.groups = groups
self.width_per_group = width_per_group

self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, blocks_num[0])
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
self.fc = nn.Linear(512 * block.expansion, num_classes)

for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

def _make_layer(self, block, channel, block_num, stride=1):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))

layers = []
layers.append(block(self.in_channel,
channel,
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion

for _ in range(1, block_num):
layers.append(block(self.in_channel,
channel,
groups=self.groups,
width_per_group=self.width_per_group))

return nn.Sequential(*layers)

def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)

x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)

if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)

return x


def resnet34(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet50(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet50-19c8e357.pth
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)


def resnet101(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)


def resnext50_32x4d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)


def resnext101_32x8d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth
groups = 32
width_per_group = 8
return ResNet(Bottleneck, [3, 4, 23, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)

对应18、34层的残差结构

18和34层的residual结构18和34层

首先定义一个类BasicBlock,对应着18层和34层所对应的残差结构,继承来自于nn.Module父类。包含实线与虚线残差结构的功能,依靠初始化函数中downsample参数进行分辨

1
2
3
4
5
6
7
8
class BasicBlock(nn.Module):
# ......
def __init__(self, in_channel, out_channel, stride=1, downsample=None):
# ......

def forward(self, x):
# ......
return out

expansion参数对应着残差结构中,主分支所采用的卷积核的个数是否发生变化。例如图上(左)显示,其输入特征矩阵和输出特征矩阵的shape是一致的,因此由expansion = 1来表示卷积核的个数并没有发生变化,也就是1倍。

在之后搭建第50、101、152层的残差结构时,会发现输出特征矩阵的深度是输入特征矩阵的4倍,也就是说,残差结构中,第三层的卷积核个数是第一、二层的四倍,因此expansion = 4。

1
expansion = 1

定义初始函数

下采样参数downsample默认为none,所对应着虚线残差结构中的shortcut的1 x 1的卷积层。作用是对上一层的输出进行维度上的缩放,保证shortcut和本层的输出能够在同一维度上合并

1
2
3
def __init__(self, in_channel, out_channel, stride=1, downsample=None):
super(BasicBlock, self).__init__()

conv1–out_channels

  • output_size = (input_size - 3 + 2 * 1 )/ 1 + 1 = input_size( shape保持不变)
  • 当stride = 2时,对应的是虚线残差结构:output_size = (input_size - 3 + 2 * 1)/ 2 + 1 = input_size / 2 + 0.5 = input_size / 2(向下取整)

conv1–bias

  • bias=False,代表不使用bias参数,在上堂课中说明,使用Batch Normalization时,使用或者不使用bias的效果是一样的

bn1–out_channel

  • Batch Normalization:所输入的参数是对着应输入特征矩阵的深度,也就是对应着卷积层1输出特征矩阵的深度,也就是out_channel
1
2
3
4
5
6
7
8
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample

正向传播函数

  • identity = x:将x赋值给identity,也就是shortcut上的输出值
  • 对下采样函数downsample进行判断,如果是None(没有输入下采样函数),则表示shortcut是实线,则可以跳过这部分,反之将输入特征矩阵x输入下采样函数downsample,得到shortcut函数的输出
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)

out += identity
out = self.relu(out)

return out

对应50、101、152层残差结构

residual结构(50、101、152层)

注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,这么做的好处是能够在top1上提升大概0.5%的准确率。可参考Resnet v1.5

1
2
3
4
5
6
7
8
class Bottleneck(nn.Module):
# ......
def __init__(self, in_channel, out_channel, stride=1, downsample=None,
# ......

def forward(self, x):
# ......
return out

在搭建第50、101、152层的残差结构时,会发现输出特征矩阵的深度是输入特征矩阵的4倍,也就是说,残差结构中,第三层的卷积核个数是第一、二层的四倍,因此expansion = 4。

1
expansion = 4

定义初始函数

1
2
def __init__(self, in_channel, out_channel, stride=1, downsample=None):
super(Bottleneck, self).__init__()

**conv1:**output_size = (input_size - 1 + 2 * 0 )/ 1 + 1 = input_size( shape保持不变)

1
2
3
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=1, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)

conv2:

  • 实线:stride默认=1,output_size = (input_size - 3 + 2 * 1 )/ 1 + 1 = input_size + 0.5 = input_size ( shape保持不变)
  • 虚线:stride = 2,由参数传入,output_size = (input_size - 3 + 2 * 1 )/ 2 + 1 = input_size / 2 + 0.5 = input_size / 2
1
2
3
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, 
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(out_channel)

conv3:

  • output_size = (input_size - 1 + 2 * 0 )/ 1 + 1 = input_size (高宽不变)
  • out_channels=out_channel * self.expansion:表示深度变为上一层输出特征矩阵深度的4倍(self.expansion = 4)
1
2
3
4
5
6
self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,
kernel_size=1, stride=1, bias=False)
# 这里我认为也可以改成self.bn3 = nn.BatchNorm2d(out_channels)
self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample

正向传播函数

identity = x:将x赋值给identity,也就是shortcut上的输出值

对下采样函数downsample进行判断,如果是None(没有输入下采样函数),则表示shortcut是实线,则可以跳过这部分,反之则对应虚线的残差结构,将输入特征矩阵x输入下采样函数downsample,得到shortcut函数的输出

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)

out = self.conv3(out)
out = self.bn3(out)

out += identity
out = self.relu(out)

return out

ResNet网络框架

在初始化函数当中,传入的block就是对应的残差结构,会根据定义的层结构传入不同的block,例如传入是18、34层的残差结构,那么就是BasicBlock,如果传入的是50、101、152层的残差结构,那么就是Bottleneck;

blocks_num:传入的是一个列表类型,对应的是使用残差结构的数目。例如对应使用34层的残差结构,那么根据参数表来看,blocks_num = [ 3, 4, 6, 3 ];对于101层就是[ 3, 4, 23, 3 ]

include_top:是为了方便以后能在ResNet网络基础上搭建更加复杂的网络,本节课并没有使用到,但代码中实现了该方法

1
2
3
4
5
6
7
8
9
10
11
12
class ResNet(nn.Module):

def __init__(self, block, blocks_num, num_classes=1000, include_top=True, groups=1, width_per_group=64):
# ......

def _make_layer(self, block, channel, block_num, stride=1):
# ......
return nn.Sequential(*layers)

def forward(self, x):
# ......
return x

定义初始函数

1
2
3
def __init__(self, block, blocks_num, num_classes=1000, include_top=True ):
super(ResNet, self).__init__()
self.include_top = include_top

根据参数表,无论在哪一个层结构下,在经过Maxpooling下采样层之后,输出特征矩阵的深度都为64

1
self.in_channel = 64

conv1:首先输入的是RGB彩色图像,因此先输入3。对应参数表中7x7的卷积层,特征矩阵深度还是64,没有发生变化,为了使特征矩阵的高和宽缩减为原来的一半,因此kernel_size = 7,padding = 3, stride = 2。

output_size = ( input_size - 7 + 2 * 3 )/ 2 + 1 = input_size / 2 + 0.5 = intput_size / 2(向下取整)

1
2
3
4
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)

maxpool:kernel_size = 3, 特征矩阵深度还是64,为了使特征矩阵的高和宽缩减为原来的一半,因此pading = 1,stride = 2

output_size = ( input_size - 3 + 2 * 1 )/ 2 + 1 = input_size / 2 + 0.5 = intput_size / 2(向下取整)

1
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

接下来定义layer1、layer2、layer3、layer4

其中layer1对应的是参数表中conv2所对应的一系列残差结构;layer2对应的是vonv3所对应的一系列残差结构;layer3对应是conv4;layer4对应conv5。这一系列layer是通过_make_layer函数生成的

1
2
3
4
self.layer1 = self._make_layer(block, 64, blocks_num[0])
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)

在输入时已经将include_top默认为True,通过自适应的平均池化下采样操作AdaptiveAvgPool2d,无论输入特征矩阵的高和宽是多少,都会以(1, 1)的形式,输出高和宽为1的特征矩阵。

再通过全连接层,也就是输出节点层,通过nn.Linear类进行定义。输入的节点个数,也就是通过平均池化下采样层之后所得到特征矩阵展平后的节点个数。由于通过平均池化下采样之后得到的特征矩阵的高和宽都是1,那么展平后的节点个数即特征矩阵的深度。

对于18、34层而言,通过conv5.x经过一系列残差结构输出的特征矩阵的深度为512,所以输入数据为512 * block.expansion,block.expansion = 1;

对于50、101、152层来说,通过conv5.x经过一系列残差结构输出的特征矩阵的深度为2048,也就是512的4倍,正好此时block.expansion = 4。

1
2
3
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1)
self.fc = nn.Linear(512 * block.expansion, num_classes)

最后对卷积层进行初始化操作

1
2
3
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

_make_layer函数

block:对应的是BasicBlock(18、34层)或者Bottleneck(50、101、152层)

channel:对应的是残差结构中卷积层所使用卷积核的个数,例如layer1对应的是conv2.1中卷积核的个数64,layer2对应的是conv3.1卷积核的个数128,layer3对应conv4.1卷积核个数是256,layer4对应conv5.1卷积核个数是512

block_num:表示该层一共包含多少个残差结构,例如在34层残差机构当中,conv2.x中一共包含3个,conv3.x包含4个,conv4.x包含6个,conv5.x包含3个

1
2
3
4
def _make_layer(self, block, channel, block_num, stride=1):
downsample = None
# ......
return nn.Sequential(*layers)

18、34层的网络结构会跳过该判断语句,50、101、152层的网络结构会执行该判断下的语句,即生成下采样函数downsample。

在layer1中,因没有输入stride,所以默认stride = 1,因此判断语句前半段不成立。in_channel判断是否等于channel * block.expansion。当在18、34层残差结构时,由于block.expansion = 1,而channel对应layer1中输入为64,所以二者相等,判断失效。

1
2
3
4
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))

首先定义layers的列表,block对应的是BasicBlock(18、34层)或者Bottleneck(50、101、152层)。等于是将网络结构中虚线残差结构的conv2.1、conv3.1、conv4.1、conv5.1的输出特征矩阵以列表的形式存放在layers列表中,在本次循环中存放进的是conv2.1。

1
2
3
4
5
6
layers = []
layers.append(block(self.in_channel,
channel,
downsample=downsample,
stride=stride))
self.in_channel = channel * block.expansion

以循环的方式将实线残差结构依次存放仅layers列表中。range(1, block_num)中从1开始,因为上面的步骤已经将0做好了。

1
2
3
for _ in range(1, block_num):
layers.append(block(self.in_channel, channel))
return nn.Sequential(*layers)

结合以上对layer1的过程描述,_make_layer函数实际上是将conv2.x、conv3.x、conv4.x、conv5.x每一层中虚线和实线对应的特征矩阵存放进对应的layers列表中。例如50层的conv2.x,layer1对应是[ 虚线,实线,实线 ]。

正向传播函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def forward(self, x):
# conv1
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)

# conv2
x = self.layer1(x)
# conv3
x = self.layer2(x)
# conv4
x = self.layer3(x)
# conv5
x = self.layer4(x)

if self.include_top:
x = self.avgpool(x)
# 展平处理
x = torch.flatten(x, 1)
# 全连接
x = self.fc(x)

return x

定义ResNet网络架构的函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# 18层
def resnet18(num_classes=1000, include_top=True):
return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes, include_top=include_top)
# 34层
def resnet34(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
# 50层
def resnet50(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
# 101层
def resnet101(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
# 152层
def resnet152(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, include_top=include_top)

train.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import sys
import json

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from tqdm import tqdm

from model import resnet34


def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))

data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
"val": transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

data_root = os.path.abspath(os.path.join(os.getcwd(), "..")) # get data root path
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)

# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)

batch_size = 16
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))

train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)

validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False,
num_workers=nw)

print("using {} images for training, {} images for validation.".format(train_num,
val_num))

net = resnet34()
# load pretrain weights
# download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
model_weight_path = "./resnet34-pre.pth"
assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
net.load_state_dict(torch.load(model_weight_path, map_location='cpu'))
# for param in net.parameters():
# param.requires_grad = False

# change fc layer structure
in_channel = net.fc.in_features
net.fc = nn.Linear(in_channel, 5)
net.to(device)

# define loss function
loss_function = nn.CrossEntropyLoss()

# construct an optimizer
params = [p for p in net.parameters() if p.requires_grad]
optimizer = optim.Adam(params, lr=0.0001)

epochs = 3
best_acc = 0.0
save_path = './resNet34.pth'
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
logits = net(images.to(device))
loss = loss_function(logits, labels.to(device))
loss.backward()
optimizer.step()

# print statistics
running_loss += loss.item()

train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)

# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
# loss = loss_function(outputs, test_labels)
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1,
epochs)

val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))

if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)

print('Finished Training')


if __name__ == '__main__':
main()

训练结果(迁移学习),准确率最终能达到93%

train迁移学习训练结果

如果不想使用迁移学习的方法,可以将以下代码注释

1
2
3
4
5
6
7
8
9
model_weight_path = "./resnet34-pre.pth"
assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
net.load_state_dict(torch.load(model_weight_path, map_location='cpu'))
# for param in net.parameters():
# param.requires_grad = False

# change fc layer structure
in_channel = net.fc.in_features
net.fc = nn.Linear(in_channel, 5)

并将传入resnet实例化参数的地方net = resnet34()传入num_classes = 5

predict.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
import json

import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

from model import resnet34


def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

data_transform = transforms.Compose(
[transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# load image
img_path = "./tulip.jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)

# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

with open(json_path, "r") as f:
class_indict = json.load(f)

# create model
model = resnet34(num_classes=5).to(device)

# load model weights
weights_path = "./resNet34.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path, map_location=device))

# prediction
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()

print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
predict[i].numpy()))
plt.show()


if __name__ == '__main__':
main()

预测结果(迁移学习)

predict迁移学习预测结果