(更新版)ResNet-18实现Cifar10(94%)和Cifar100(75%)分类! |
- 本文章是 TensorFlow2.0学习笔记 系列,欢迎关注,专栏链接: TensorFlow2.0学习笔记,文章会持续更新,多希望大家点赞收藏加转发!
- 文章总目录链接:TensorFlow2.0学习笔记总目录!
文章目录
- 一. ResNet-18实现Cifar100图像分类!
- 1.0. 工具包:utils.py
- 1.1. 方法1:TensorFlow2.0代码演示
- 1.2. 方法1:TensorFlow2.0执行结果
- 1.3. 方法2:TensorFlow2.0代码演示
- 1.4. 方法2:TensorFlow2.0执行结果
- 二. ResNet-18实现Cifar10图像分类!
- 2.1. 方法1:TensorFlow2.0代码演示
- 2.2. 方法1:TensorFlow2.0执行结果
- 2.3. 方法2:TensorFlow2.0代码演示
- 2.4. 方法2:TensorFlow2.0执行结果
- 三. ResNet-18实现Cifar100图像分类(注意力)!
- 3.1. resnet.py
- 3.2. res_my_soft.py
- 3.3. 有注意力机制结果
- 四. 需要辅导可以私聊我!
- ResNet基础知识介绍请看这里的链接:ResNet-18和ResNet-34实现Cifar10和Cifar100图像分类!
- 本文使用Tensorflow2.0实现(欢迎各位朋友star,接下来还会继续更新项目!),代码地址:zhangkaifang/ResNet
一. ResNet-18实现Cifar100图像分类!
1.0. 工具包:utils.py
import numpy as np
# 1. 计算数据集(图片)的均值和方差(RGB的3个通道)
def compute_mean_var(image):
# image.shape: [image_num, w, h, c]
mean = []
var = []
for c in range(image.shape[-1]):
mean.append(np.mean(image[:, :, :, c]))
var.append(np.std(image[:, :, :, c]))
return mean, var
# 2. 归一化图片
def norm_images(image):
# image.shape: [image_num, w, h, c]
image = image.astype('float32')
mean, var = compute_mean_var(image)
image[:, :, :, 0] = (image[:, :, :, 0] - mean[0]) / var[0]
image[:, :, :, 1] = (image[:, :, :, 1] - mean[1]) / var[1]
image[:, :, :, 2] = (image[:, :, :, 2] - mean[2]) / var[2]
return image
def normalize(x, mean, std):
# x shape: [224, 224, 3]
# mean:shape为1;这里用到了广播机制。我们安装好右边对齐的原则,可以得到如下;
# mean : [1, 1, 3], std: [3] 先插入1
# mean : [224, 224, 3], std: [3] 再变为224
x = (x - mean)/std # 这1行代码等价19到21行的1行代码;
return x
# 3. 学习率调整测率200epoch
def lr_schedule_200ep(epoch):
if epoch < 60:
return 0.1
if epoch < 120:
return 0.02
if epoch < 160:
return 0.004
if epoch < 200:
return 0.0008
if epoch < 250:
return 0.0003
if epoch < 300:
return 0.0001
else:
return 0.00006
# 4. 学习率调整测率500epoch
def lr_schedule_300ep(epoch):
if epoch < 150:
return 0.1
if epoch < 225:
return 0.01
if epoch < 300:
return 0.001
1.1. 方法1:TensorFlow2.0代码演示
-
resnet.py
import tensorflow as tf
from tensorflow.python.keras.api._v2.keras import layers, Sequential, regularizers
import tensorflow.keras as keras
"""
======================================================================
第2个版本 2020-08-19日 @zhangkaifang 更接近最真实的resnet18
======================================================================
"""
################################################################################################
# 定义一个3x3卷积!
def regularized_padded_conv(*args, **kwargs):
'''
kernel_initializer='glorot_normal': Glorot正态分布初始化方法,也称作Xavier正态分布初始化,
参数由0均值,标准差为sqrt(2 / (fan_in + fan_out))的正态分布产生,其中
fan_in和fan_out是权重张量的扇入扇出(即输入和输出单元数目;
kernel_initializer='he_normal': He正态分布初始化方法,参数由0均值,标准差为sqrt(2 / fan_in)
的正态分布产生,其中fan_in权重张量的扇入, kernel_regularizer=regularizers.l2(5e-4)
'''
return layers.Conv2D(*args, **kwargs, padding='same',
use_bias=False, kernel_initializer='glorot_normal')
# 1.定义 Basic Block 模块。对于Resnet18和Resnet34
class BasicBlock(layers.Layer):
expansion = 1
def __init__(self, in_channels, out_channels, stride=1):
super(BasicBlock, self).__init__()
# 1. BasicBlock模块中的第1个卷积层;
self.conv1 = regularized_padded_conv(out_channels, kernel_size=3, strides=stride)
self.bn1 = layers.BatchNormalization()
# 2. BasicBlock模块中的第2个卷积;第1个卷积如果做stride就会有一个下采样,在这个里面就不做下采样了。这一块始终保持size一致,把stride固定为1
self.conv2 = regularized_padded_conv(out_channels, kernel_size=3, strides=1)
self.bn2 = layers.BatchNormalization()
# 3. 判断stride是否等于1,如果为1就是没有降采样。
if stride != 1 or in_channels != self.expansion * out_channels:
self.shortcut = Sequential([regularized_padded_conv(self.expansion * out_channels,
kernel_size=1, strides=stride),
layers.BatchNormalization()])
else:
self.shortcut = lambda x, _: x
def call(self, inputs, training=False):
# if training: print("=> training network ... ")
out = self.conv1(inputs)
out = self.bn1(out, training=training)
out = tf.nn.relu(out)
out = self.conv2(out)
out = self.bn2(out, training=training)
out = out + self.shortcut(inputs, training)
out = tf.nn.relu(out)
return out
################################################################################################
# 1.定义 Bottleneck 模块。对于Resnet50,Resnet101和Resnet152;
class Bottleneck(keras.Model):
expansion = 4
def __init__(self, in_channels, out_channels, strides=1):
super(Bottleneck, self).__init__()
self.conv1 = layers.Conv2D(out_channels, 1, 1, use_bias=False)
self.bn1 = layers.BatchNormalization()
self.conv2 = layers.Conv2D(out_channels, 3, strides, padding="same", use_bias=False)
self.bn2 = layers.BatchNormalization()
self.conv3 = layers.Conv2D(out_channels*self.expansion, 1, 1, use_bias=False)
self.bn3 = layers.BatchNormalization()
if strides != 1 or in_channels != self.expansion * out_channels:
self.shortcut = Sequential([layers.Conv2D(self.expansion * out_channels, kernel_size=1,
strides=strides, use_bias=False),
layers.BatchNormalization()])
else:
self.shortcut = lambda x,_: x
def call(self, x, training=False):
out = tf.nn.relu(self.bn1(self.conv1(x), training))
out = tf.nn.relu(self.bn2(self.conv2(out), training))
out = self.bn3(self.conv3(out), training)
out = out + self.shortcut(x, training)
out = tf.nn.relu(out)
return out
################################################################################################
# 2. ResBlock 模块。继承keras.Model或者keras.Layer都可以
class ResNet(keras.Model):
# 第1个参数layer_dims:[2, 2, 2, 2] 4个Res Block,每个包含2个Basic Block,
# 第3参数num_classes:我们的全连接输出,取决于输出有多少类。
def __init__(self, blocks, layer_dims, num_classes=100):
super(ResNet, self).__init__()
self.in_channels = 64
# 0. 预处理卷积层;实现比较灵活可以加MAXPool2D,或者不加,这里没加。
# 注意这里的channels需要和layer1的channels是一样的,不然能add。
self.stem = Sequential([regularized_padded_conv(64, kernel_size=3, strides=1),
layers.BatchNormalization()])
# 1. 创建4个ResBlock;注意第1项不一定以2倍形式扩张,都是比较随意的,这里都是经验值。
self.layer1 = self.build_resblock(blocks, 64, layer_dims[0], stride=1)
self.layer2 = self.build_resblock(blocks, 128, layer_dims[1], stride=2)
self.layer3 = self.build_resblock(blocks, 256, layer_dims[2], stride=2)
self.layer4 = self.build_resblock(blocks, 512, layer_dims[3], stride=2)
# self.final_bn = layers.BatchNormalization()
# self.avgpool = layers.GlobalAveragePooling2D()
# self.fc = layers.Dense(num_classes)
# 2. 创建ResBlock
def build_resblock(self, blocks, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1) # [1]*3 = [1, 1, 1]
res_blocks = Sequential()
for stride in strides:
res_blocks.add(blocks(self.in_channels, out_channels, stride))
self.in_channels = out_channels
return res_blocks
def call(self,inputs, training=False):
# __init__中准备工作完毕;下面完成前向运算过程。
out = self.stem(inputs, training)
out = tf.nn.relu(out)
out = self.layer1(out, training=training)
out = self.layer2(out, training=training)
out = self.layer3(out, training=training)
out = self.layer4(out, training=training)
# out = self.final_bn(out, training=training)
# out = tf.nn.relu(out)
# 做一个global average pooling,得到之后只会得到一个channel,不需要做reshape操作了。
# shape为 [batchsize, channel]
# x = self.avgpool(x)
# # [b, 100]
# x = self.fc(x)
return out
################################################################################################
""" Resnet18 """
def ResNet18():
return ResNet(BasicBlock, [2, 2, 2, 2])
""" ResNet-34,那34是怎样的配置呢?只需要改一下这里就可以了。4个Res Block """
# 如果我们要使用
def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
""" Resnet50 """
def ResNet50():
return ResNet(Bottleneck, [3, 4, 6, 3])
""" Resnet101 """
def ResNet101():
return ResNet(Bottleneck, [3, 4, 23, 3])
""" Resnet152 """
def ResNet152():
return ResNet(Bottleneck, [3, 8, 36, 3])
- 主程序如下
my_Softmax_resnet18.py
:
import tensorflow as tf
from tensorflow.keras import layers, optimizers, datasets, Sequential, regularizers
from resnet2 import ResNet18
from utils import compute_mean_var, lr_schedule_200ep
import numpy as np
import random
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.random.set_seed(2345)
batchsz = 128
# 1. 归一化函数实现;cifar10 均值和方差,自己计算的。
img_mean = tf.constant([0.50736203482434500, 0.4866895632914611, 0.4410885713465068])
img_std = tf.constant([0.26748815488001604, 0.2565930997269337, 0.2763085095510783])
def normalize(x, mean=img_mean, std=img_std):
# x shape: [224, 224, 3]
# mean:shape为1;这里用到了广播机制。我们安装好右边对齐的原则,可以得到如下;
# mean : [1, 1, 3], std: [3] 先插入1
# mean : [224, 224, 3], std: [3] 再变为224
x = (x - mean)/std
return x
# 2. 数据预处理,仅仅是类型的转换。 [-1~1]
def preprocess(x, y):
x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) # 上下填充4个0,左右填充4个0,变为[40, 40, 3]
x = tf.image.random_crop(x, [32, 32, 3])
x = tf.image.random_flip_left_right(x)
# x: [0,255]=> 0~1 其次:normalizaion
x = tf.cast(x, dtype=tf.float32) / 255.
# 0~1 => D(0,1) 调用函数;
x = normalize(x)
y = tf.cast(y, dtype=tf.int32)
return x, y
# 数据集的加载
(x, y), (x_test, y_test) = datasets.cifar100.load_data()
y = tf.squeeze(y) # 或者tf.squeeze(y, axis=1)把1维度的squeeze掉。
y_test = tf.squeeze(y_test) # 或者tf.squeeze(y, axis=1)把1维度的squeeze掉。
print(x.shape, y.shape, x_test.shape, y_test.shape)
################## 计算均值和方差 #########################
# x_all = np.concatenate([x, x_test], axis=0).astype(np.float)
# # print(x_all.shape)
# mean_train, std_train = compute_mean_var(x_all/255.)
# print('mean_train:', mean_train, 'std_train:', std_train)
########################################################
# 训练集和标签包装成Dataset对象
train_db = tf.data.Dataset.from_tensor_slices((x, y))
train_db = train_db.shuffle(5000).map(preprocess).batch(batchsz)
# 测试集和标签包装成Dataset对象
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.map(preprocess).batch(batchsz)
# 我们来取一个样本,测试一下sample的形状。
sample = next(iter(train_db))
print('sample:', sample[0].shape, sample[1].shape,
tf.reduce_min(sample[0]),
tf.reduce_max(sample[0])) # 值范围为[0,1]
def main():
# 输入:[b, 32, 32, 3]
model = ResNet18()
# model = resnet1.ResNet([2, 2, 2], 10)
model.build(input_shape=(None, 32, 32, 3))
model.summary()
mydense = layers.Dense(100, activation=None)
fc_net = Sequential([mydense])
fc_net.build(input_shape=(None, 512))
fc_net.summary()
lr = 0.1
optimizer = optimizers.SGD(lr=lr, momentum=0.9, decay=5e-4)
variables = model.trainable_variables + fc_net.trainable_variables
for epoch in range(500):
for step, (x, y) in enumerate(train_db):
with tf.GradientTape() as tape:
# [b, 32, 32, 3] => [b, 100]
out = model(x, training=True)
avgpool = layers.GlobalAveragePooling2D()(out)
logits = fc_net(avgpool)
y_onehot = tf.one_hot(y, depth=100)
# 多类别交叉熵损失 结果维度[b]
loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True))
# 添加正则项,所有可以训练的权重添加l2正则项
loss_regularization = []
for p in variables:
loss_regularization.append(tf.nn.l2_loss(p))
loss_regularization = tf.reduce_sum(tf.stack(loss_regularization))
loss = loss + 5e-4 * loss_regularization
# 梯度求解
grads = tape.gradient(loss, variables)
# 梯度更新
optimizer.apply_gradients(zip(grads, variables))
# 学习率动态调整
lr = lr_schedule_200ep(epoch)
# 每100个step打印一次
if step % 100 == 0:
print('epoch:', epoch, 'step:', step, 'loss:', float(loss), 'lr:', lr)
# 做测试
total_num = 0
total_correct = 0
for x, y in test_db:
out = model(x, training=False)
avgpool = layers.GlobalAveragePooling2D()(out)
output = fc_net(avgpool)
# 预测可能性。
prob = tf.nn.softmax(output, axis=1)
pred = tf.argmax(prob, axis=1) # 还记得吗pred类型为int64,需要转换一下。
pred = tf.cast(pred, dtype=tf.int32)
# 拿到预测值pred和真实值比较。
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_num += x.shape[0]
total_correct += int(correct) # 转换为numpy数据
acc = total_correct / total_num
print('epoch:', epoch, 'test_acc:', acc)
if __name__ == '__main__':
main()
1.2. 方法1:TensorFlow2.0执行结果
ssh://zhangkf@192.168.136.64:22/home/zhangkf/anaconda3/envs/tf2/bin/python -u /home/zhangkf/johnCodes/TF2/TF2_2/my_Softmax_resnet18.py
(50000, 32, 32, 3) (50000,) (10000, 32, 32, 3) (10000,)
sample: (128, 32, 32, 3) (128,) tf.Tensor(-1.8967644, shape=(), dtype=float32) tf.Tensor(2.0227804, shape=(), dtype=float32)
Model: "res_net"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
sequential (Sequential) multiple 1984
_________________________________________________________________
sequential_1 (Sequential) multiple 148480
_________________________________________________________________
sequential_2 (Sequential) multiple 526848
_________________________________________________________________
sequential_4 (Sequential) multiple 2102272
_________________________________________________________________
sequential_6 (Sequential) multiple 8398848
=================================================================
Total params: 11,178,432
Trainable params: 11,168,832
Non-trainable params: 9,600
_________________________________________________________________
Model: "sequential_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) multiple 51300
=================================================================
Total params: 51,300
Trainable params: 51,300
Non-trainable params: 0
_________________________________________________________________
epoch: 0 step: 0 loss: 7.394731521606445 lr: 0.1
epoch: 0 step: 100 loss: 6.207117080688477 lr: 0.1
epoch: 0 step: 200 loss: 5.824721336364746 lr: 0.1
epoch: 0 step: 300 loss: 5.4654998779296875 lr: 0.1
epoch: 0 test_acc: 0.1189
......
......
epoch: 183 test_acc: 0.7506
epoch: 184 step: 0 loss: 0.20965977013111115 lr: 0.0008
epoch: 184 step: 100 loss: 0.20780201256275177 lr: 0.0008
epoch: 184 step: 200 loss: 0.21273905038833618 lr: 0.0008
epoch: 184 step: 300 loss: 0.20833362638950348 lr: 0.0008
epoch: 184 test_acc: 0.7482
epoch: 185 step: 0 loss: 0.2106461524963379 lr: 0.0008
epoch: 185 step: 100 loss: 0.20884671807289124 lr: 0.0008
epoch: 185 step: 200 loss: 0.2080637365579605 lr: 0.0008
epoch: 185 step: 300 loss: 0.2059493064880371 lr: 0.0008
epoch: 185 test_acc: 0.7485
epoch: 186 step: 0 loss: 0.21030229330062866 lr: 0.0008
epoch: 186 step: 100 loss: 0.21696045994758606 lr: 0.0008
epoch: 186 step: 200 loss: 0.20754486322402954 lr: 0.0008
epoch: 186 step: 300 loss: 0.20610006153583527 lr: 0.0008
epoch: 186 test_acc: 0.7478
epoch: 187 step: 0 loss: 0.20540566742420197 lr: 0.0008
epoch: 187 step: 100 loss: 0.20688945055007935 lr: 0.0008
epoch: 187 step: 200 loss: 0.20678606629371643 lr: 0.0008
epoch: 187 step: 300 loss: 0.20605340600013733 lr: 0.0008
epoch: 187 test_acc: 0.7517
epoch: 188 step: 0 loss: 0.2030630260705948 lr: 0.0008
epoch: 188 step: 100 loss: 0.2111106812953949 lr: 0.0008
epoch: 188 step: 200 loss: 0.20715123414993286 lr: 0.0008
epoch: 188 step: 300 loss: 0.20635318756103516 lr: 0.0008
epoch: 188 test_acc: 0.7496
epoch: 189 step: 0 loss: 0.20348598062992096 lr: 0.0008
epoch: 189 step: 100 loss: 0.20404331386089325 lr: 0.0008
epoch: 189 step: 200 loss: 0.20470349490642548 lr: 0.0008
epoch: 189 step: 300 loss: 0.2049822062253952 lr: 0.0008
epoch: 189 test_acc: 0.7501
epoch: 190 step: 0 loss: 0.2041555643081665 lr: 0.0008
epoch: 190 step: 100 loss: 0.2052450180053711 lr: 0.0008
epoch: 190 step: 200 loss: 0.20579159259796143 lr: 0.0008
epoch: 190 step: 300 loss: 0.2014419138431549 lr: 0.0008
epoch: 190 test_acc: 0.7465
Process finished with exit code 0
1.3. 方法2:TensorFlow2.0代码演示
-
resnet2.py
import tensorflow as tf
from tensorflow.python.keras.api._v2.keras import layers, Sequential, regularizers
import tensorflow.keras as keras
"""
======================================================================
第2个版本 2020-08-19日 @zhangkaifang 更接近最真实的resnet18
======================================================================
"""
################################################################################################
# 定义一个3x3卷积!
def regularized_padded_conv(*args, **kwargs):
'''
kernel_initializer='glorot_normal': Glorot正态分布初始化方法,也称作Xavier正态分布初始化,
参数由0均值,标准差为sqrt(2 / (fan_in + fan_out))的正态分布产生,其中
fan_in和fan_out是权重张量的扇入扇出(即输入和输出单元数目;
kernel_initializer='he_normal': He正态分布初始化方法,参数由0均值,标准差为sqrt(2 / fan_in)
的正态分布产生,其中fan_in权重张量的扇入kernel_regularizer=regularizers.l2(5e-4),
'''
return layers.Conv2D(*args, **kwargs, padding='same',
use_bias=False, kernel_initializer='glorot_normal')
# 1.定义 Basic Block 模块。对于Resnet18和Resnet34
class BasicBlock(layers.Layer):
expansion = 1
def __init__(self, in_channels, out_channels, stride=1):
super(BasicBlock, self).__init__()
# 1. BasicBlock模块中的第1个卷积层;
self.conv1 = regularized_padded_conv(out_channels, kernel_size=3, strides=stride)
self.bn1 = layers.BatchNormalization()
# 2. BasicBlock模块中的第2个卷积;第1个卷积如果做stride就会有一个下采样,在这个里面就不做下采样了。这一块始终保持size一致,把stride固定为1
self.conv2 = regularized_padded_conv(out_channels, kernel_size=3, strides=1)
self.bn2 = layers.BatchNormalization()
# 3. 判断stride是否等于1,如果为1就是没有降采样。
if stride != 1 or in_channels != self.expansion * out_channels:
self.shortcut = Sequential([regularized_padded_conv(self.expansion * out_channels,
kernel_size=1, strides=stride),
layers.BatchNormalization()])
else:
self.shortcut = lambda x, _: x
def call(self, inputs, training=False):
# if training: print("=> training network ... ")
out = self.conv1(inputs)
out = self.bn1(out, training=training)
out = tf.nn.relu(out)
out = self.conv2(out)
out = self.bn2(out, training=training)
out = out + self.shortcut(inputs, training)
out = tf.nn.relu(out)
return out
################################################################################################
# 1.定义 Bottleneck 模块。对于Resnet50,Resnet101和Resnet152;
class Bottleneck(keras.Model):
expansion = 4
def __init__(self, in_channels, out_channels, strides=1):
super(Bottleneck, self).__init__()
self.conv1 = layers.Conv2D(out_channels, 1, 1, use_bias=False)
self.bn1 = layers.BatchNormalization()
self.conv2 = layers.Conv2D(out_channels, 3, strides, padding="same", use_bias=False)
self.bn2 = layers.BatchNormalization()
self.conv3 = layers.Conv2D(out_channels*self.expansion, 1, 1, use_bias=False)
self.bn3 = layers.BatchNormalization()
if strides != 1 or in_channels != self.expansion * out_channels:
self.shortcut = Sequential([layers.Conv2D(self.expansion * out_channels, kernel_size=1,
strides=strides, use_bias=False),
layers.BatchNormalization()])
else:
self.shortcut = lambda x,_: x
def call(self, x, training=False):
out = tf.nn.relu(self.bn1(self.conv1(x), training))
out = tf.nn.relu(self.bn2(self.conv2(out), training))
out = self.bn3(self.conv3(out), training)
out = out + self.shortcut(x, training)
out = tf.nn.relu(out)
return out
################################################################################################
# 2. ResBlock 模块。继承keras.Model或者keras.Layer都可以
class ResNet(keras.Model):
# 第1个参数layer_dims:[2, 2, 2, 2] 4个Res Block,每个包含2个Basic Block,
# 第3参数num_classes:我们的全连接输出,取决于输出有多少类。
def __init__(self, blocks, layer_dims, num_classes=100):
super(ResNet, self).__init__()
self.in_channels = 64
# 0. 预处理卷积层;实现比较灵活可以加MAXPool2D,或者不加,这里没加。
# 注意这里的channels需要和layer1的channels是一样的,不然能add。
self.stem = Sequential([regularized_padded_conv(64, kernel_size=3, strides=1),
layers.BatchNormalization()])
# 1. 创建4个ResBlock;注意第1项不一定以2倍形式扩张,都是比较随意的,这里都是经验值。
self.layer1 = self.build_resblock(blocks, 64, layer_dims[0], stride=1)
self.layer2 = self.build_resblock(blocks, 128, layer_dims[1], stride=2)
self.layer3 = self.build_resblock(blocks, 256, layer_dims[2], stride=2)
self.layer4 = self.build_resblock(blocks, 512, layer_dims[3], stride=2)
# self.final_bn = layers.BatchNormalization()
# self.avgpool = layers.GlobalAveragePooling2D()
# self.fc = layers.Dense(num_classes)
# 2. 创建ResBlock
def build_resblock(self, blocks, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1) # [1]*3 = [1, 1, 1]
res_blocks = Sequential()
for stride in strides:
res_blocks.add(blocks(self.in_channels, out_channels, stride))
self.in_channels = out_channels
return res_blocks
def call(self,inputs, training=False):
# __init__中准备工作完毕;下面完成前向运算过程。
out = self.stem(inputs, training)
out = tf.nn.relu(out)
out = self.layer1(out, training=training)
out = self.layer2(out, training=training)
out = self.layer3(out, training=training)
out = self.layer4(out, training=training)
# out = self.final_bn(out, training=training)
# out = tf.nn.relu(out)
# 做一个global average pooling,得到之后只会得到一个channel,不需要做reshape操作了。
# shape为 [batchsize, channel]
# x = self.avgpool(x)
# # [b, 100]
# x = self.fc(x)
return out
################################################################################################
""" Resnet18 """
def ResNet18():
return ResNet(BasicBlock, [2, 2, 2, 2])
""" ResNet-34,那34是怎样的配置呢?只需要改一下这里就可以了。4个Res Block """
# 如果我们要使用
def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
""" Resnet50 """
def ResNet50():
return ResNet(Bottleneck, [3, 4, 6, 3])
""" Resnet101 """
def ResNet101():
return ResNet(Bottleneck, [3, 4, 23, 3])
""" Resnet152 """
def ResNet152():
return ResNet(Bottleneck, [3, 8, 36, 3])
- 主程序
resnet_my_soft.py
import tensorflow as tf
from tensorflow.keras import layers, optimizers, datasets, Sequential, regularizers
from resnet2 import ResNet18
from utils import compute_mean_var, norm_images, lr_schedule_200ep
import numpy as np
import random
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.random.set_seed(2345)
batchsz = 128
# 数据预处理,仅仅是类型的转换。 [-1~1]
def preprocess(x, y):
x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) # 上下(0轴)填充4个0,左右(1轴)填充4个0,3轴不填充。最终变为[40, 40, 3]
x = tf.image.random_crop(x, [32, 32, 3]) # 这个操作是随机crop,不是以中心为标准;
x = tf.image.random_flip_left_right(x)
y = tf.cast(y, dtype=tf.int32)
return x, y
# 数据集的加载
(x, y), (x_test, y_test) = datasets.cifar100.load_data()
y = tf.squeeze(y) # 或者tf.squeeze(y, axis=1)把1维度的squeeze掉。
y_test = tf.squeeze(y_test) # 或者tf.squeeze(y, axis=1)把1维度的squeeze掉。
print(x.shape, y.shape, x_test.shape, y_test.shape)
# 数据归一化,通过均值和方差。
mean_train, std_train = compute_mean_var(x/255.)
mean_test, std_test = compute_mean_var(x_test/255.)
x = norm_images(x)
x_test = norm_images(x_test)
# 训练集和标签包装成Dataset对象
train_db = tf.data.Dataset.from_tensor_slices((x, y))
train_db = train_db.shuffle(5000).map(preprocess).batch(batchsz)
# 测试集和标签包装成Dataset对象
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.map(preprocess).batch(batchsz)
# 我们来取一个样本,测试一下sample的形状。
sample = next(iter(train_db))
print('sample:', sample[0].shape, sample[1].shape,
tf.reduce_min(sample[0]),
tf.reduce_max(sample[0])) # 值范围为[0,1]
def main():
# 输入:[b, 32, 32, 3]
model = ResNet18()
# model = resnet1.ResNet([2, 2, 2], 10)
model.build(input_shape=(None, 32, 32, 3))
model.summary()
mydense = layers.Dense(100, activation=None)
fc_net = Sequential([mydense])
fc_net.build(input_shape=(None, 512))
fc_net.summary()
lr = 0.1
optimizer = optimizers.SGD(lr=lr, momentum=0.9, decay=5e-4)
variables = model.trainable_variables + fc_net.trainable_variables
for epoch in range(500):
for step, (x, y) in enumerate(train_db):
with tf.GradientTape() as tape:
# [b, 32, 32, 3] => [b, 100]
out = model(x, training=True)
avgpool = layers.GlobalAveragePooling2D()(out)
logits = fc_net(avgpool)
y_onehot = tf.one_hot(y, depth=100)
# 多类别交叉熵损失 结果维度[b]
loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True))
# 添加正则项,所有可以训练的权重添加l2正则项
loss_regularization = []
for p in variables:
loss_regularization.append(tf.nn.l2_loss(p))
loss_regularization = tf.reduce_sum(tf.stack(loss_regularization))
loss = loss + 5e-4 * loss_regularization
# 梯度求解
grads = tape.gradient(loss, variables)
# 梯度更新
optimizer.apply_gradients(zip(grads, variables))
# 学习率动态调整
lr = lr_schedule_200ep(epoch)
# 每100个step打印一次
if step % 100 == 0:
print('epoch:', epoch, 'step:', step, 'loss:', float(loss), 'lr:', lr)
# 做测试
total_num = 0
total_correct = 0
for x, y in test_db:
out = model(x, training=False)
avgpool = layers.GlobalAveragePooling2D()(out)
output = fc_net(avgpool)
# 预测可能性。
prob = tf.nn.softmax(output, axis=1)
pred = tf.argmax(prob, axis=1) # 还记得吗pred类型为int64,需要转换一下。
pred = tf.cast(pred, dtype=tf.int32)
# 拿到预测值pred和真实值比较。
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_num += x.shape[0]
total_correct += int(correct) # 转换为numpy数据
acc = total_correct / total_num
print('epoch:', epoch, 'test_acc:', acc)
if __name__ == '__main__':
main()
-
utils.py
import numpy as np
# 1. 计算数据集(图片)的均值和方差(RGB的3个通道)
def compute_mean_var(image):
# image.shape: [image_num, w, h, c]
mean = []
var = []
for c in range(image.shape[-1]):
mean.append(np.mean(image[:, :, :, c]))
var.append(np.std(image[:, :, :, c]))
return mean, var
# 2. 归一化图片
def norm_images(image):
# image.shape: [image_num, w, h, c]
image = image.astype('float32')
mean, var = compute_mean_var(image)
image[:, :, :, 0] = (image[:, :, :, 0] - mean[0]) / var[0]
image[:, :, :, 1] = (image[:, :, :, 1] - mean[1]) / var[1]
image[:, :, :, 2] = (image[:, :, :, 2] - mean[2]) / var[2]
return image
def normalize(x, mean, std):
# x shape: [224, 224, 3]
# mean:shape为1;这里用到了广播机制。我们安装好右边对齐的原则,可以得到如下;
# mean : [1, 1, 3], std: [3] 先插入1
# mean : [224, 224, 3], std: [3] 再变为224
x = (x - mean)/std # 这1行代码等价19到21行的1行代码;
return x
# 3. 学习率调整测率200epoch
def lr_schedule_200ep(epoch):
if epoch < 60:
return 0.1
if epoch < 120:
return 0.02
if epoch < 160:
return 0.004
if epoch < 200:
return 0.0008
if epoch < 250:
return 0.0003
if epoch < 300:
return 0.0001
else:
return 0.00006
# 4. 学习率调整测率500epoch
def lr_schedule_300ep(epoch):
if epoch < 150:
return 0.1
if epoch < 225:
return 0.01
if epoch < 300:
return 0.001
1.4. 方法2:TensorFlow2.0执行结果
ssh://zhangkf@192.168.136.64:22/home/zhangkf/anaconda3/envs/tf2/bin/python -u /home/zhangkf/johnCodes/TF2/TF2_2/resnet_my_soft.py
(50000, 32, 32, 3) (50000,) (10000, 32, 32, 3) (10000,)
sample: (128, 32, 32, 3) (128,) tf.Tensor(-1.8967644, shape=(), dtype=float32) tf.Tensor(2.0227804, shape=(), dtype=float32)
Model: "res_net"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
sequential (Sequential) multiple 1984
_________________________________________________________________
sequential_1 (Sequential) multiple 148480
_________________________________________________________________
sequential_2 (Sequential) multiple 526848
_________________________________________________________________
sequential_4 (Sequential) multiple 2102272
_________________________________________________________________
sequential_6 (Sequential) multiple 8398848
=================================================================
Total params: 11,178,432
Trainable params: 11,168,832
Non-trainable params: 9,600
_________________________________________________________________
Model: "sequential_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) multiple 51300
=================================================================
Total params: 51,300
Trainable params: 51,300
Non-trainable params: 0
_________________________________________________________________
epoch: 0 step: 0 loss: 7.394731521606445 lr: 0.1
epoch: 0 step: 100 loss: 6.207117080688477 lr: 0.1
epoch: 0 step: 200 loss: 5.824721336364746 lr: 0.1
epoch: 0 step: 300 loss: 5.4654998779296875 lr: 0.1
epoch: 0 test_acc: 0.1289
......
......
epoch: 489 step: 0 loss: 0.1494695544242859 lr: 6e-05
epoch: 489 step: 100 loss: 0.14808781445026398 lr: 6e-05
epoch: 489 step: 200 loss: 0.14702562987804413 lr: 6e-05
epoch: 489 step: 300 loss: 0.1462768167257309 lr: 6e-05
epoch: 489 test_acc: 0.7493
epoch: 490 step: 0 loss: 0.14813287556171417 lr: 6e-05
epoch: 490 step: 100 loss: 0.14719073474407196 lr: 6e-05
epoch: 490 step: 200 loss: 0.14599332213401794 lr: 6e-05
epoch: 490 step: 300 loss: 0.1488029956817627 lr: 6e-05
epoch: 490 test_acc: 0.7495
epoch: 491 step: 0 loss: 0.145082488656044 lr: 6e-05
epoch: 491 step: 100 loss: 0.146611288189888 lr: 6e-05
epoch: 491 step: 200 loss: 0.14689156413078308 lr: 6e-05
epoch: 491 step: 300 loss: 0.14749108254909515 lr: 6e-05
epoch: 491 test_acc: 0.7497
epoch: 492 step: 0 loss: 0.14979220926761627 lr: 6e-05
epoch: 492 step: 100 loss: 0.14942795038223267 lr: 6e-05
epoch: 492 step: 200 loss: 0.14613668620586395 lr: 6e-05
epoch: 492 step: 300 loss: 0.14891566336154938 lr: 6e-05
epoch: 492 test_acc: 0.749
epoch: 493 step: 0 loss: 0.1457490772008896 lr: 6e-05
epoch: 493 step: 100 loss: 0.15404482185840607 lr: 6e-05
epoch: 493 step: 200 loss: 0.14911715686321259 lr: 6e-05
epoch: 493 step: 300 loss: 0.15132851898670197 lr: 6e-05
epoch: 493 test_acc: 0.7516
epoch: 494 step: 0 loss: 0.145752415060997 lr: 6e-05
epoch: 494 step: 100 loss: 0.145289346575737 lr: 6e-05
epoch: 494 step: 200 loss: 0.14817515015602112 lr: 6e-05
epoch: 494 step: 300 loss: 0.1482996940612793 lr: 6e-05
epoch: 494 test_acc: 0.7511
epoch: 495 step: 0 loss: 0.14684642851352692 lr: 6e-05
epoch: 495 step: 100 loss: 0.145888090133667 lr: 6e-05
epoch: 495 step: 200 loss: 0.1546863317489624 lr: 6e-05
epoch: 495 step: 300 loss: 0.14529471099376678 lr: 6e-05
epoch: 495 test_acc: 0.7492
epoch: 496 step: 0 loss: 0.1461883932352066 lr: 6e-05
epoch: 496 step: 100 loss: 0.14481580257415771 lr: 6e-05
epoch: 496 step: 200 loss: 0.14598236978054047 lr: 6e-05
epoch: 496 step: 300 loss: 0.14752474427223206 lr: 6e-05
epoch: 496 test_acc: 0.7468
epoch: 497 step: 0 loss: 0.1454370766878128 lr: 6e-05
epoch: 497 step: 100 loss: 0.14692384004592896 lr: 6e-05
epoch: 497 step: 200 loss: 0.14532673358917236 lr: 6e-05
epoch: 497 step: 300 loss: 0.14575102925300598 lr: 6e-05
epoch: 497 test_acc: 0.7504
epoch: 498 step: 0 loss: 0.1448342204093933 lr: 6e-05
epoch: 498 step: 100 loss: 0.14427633583545685 lr: 6e-05
epoch: 498 step: 200 loss: 0.14443443715572357 lr: 6e-05
epoch: 498 step: 300 loss: 0.14560484886169434 lr: 6e-05
epoch: 498 test_acc: 0.748
epoch: 499 step: 0 loss: 0.14439353346824646 lr: 6e-05
epoch: 499 step: 100 loss: 0.14624375104904175 lr: 6e-05
epoch: 499 step: 200 loss: 0.14497709274291992 lr: 6e-05
epoch: 499 step: 300 loss: 0.14951634407043457 lr: 6e-05
epoch: 499 test_acc: 0.7493
Process finished with exit code 0
二. ResNet-18实现Cifar10图像分类!
2.1. 方法1:TensorFlow2.0代码演示
- 只需要修改上面第一章节方法1的程序为
cifar10
数据集就可以了。
2.2. 方法1:TensorFlow2.0执行结果
ssh://zhangkf@192.168.136.64:22/home/zhangkf/anaconda3/envs/tf2/bin/python -u /home/zhangkf/johnCodes/TF2/TF2_2/my_Softmax_resnet18.py
(50000, 32, 32, 3) (50000,) (10000, 32, 32, 3) (10000,)
sample: (128, 32, 32, 3) (128,) tf.Tensor(-1.8967644, shape=(), dtype=float32) tf.Tensor(2.0227804, shape=(), dtype=float32)
Model: "res_net"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
sequential (Sequential) multiple 1984
_________________________________________________________________
sequential_1 (Sequential) multiple 148480
_________________________________________________________________
sequential_2 (Sequential) multiple 526848
_________________________________________________________________
sequential_4 (Sequential) multiple 2102272
_________________________________________________________________
sequential_6 (Sequential) multiple 8398848
=================================================================
Total params: 11,178,432
Trainable params: 11,168,832
Non-trainable params: 9,600
_________________________________________________________________
Model: "sequential_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) multiple 51300
=================================================================
Total params: 51,300
Trainable params: 51,300
Non-trainable params: 0
_________________________________________________________________
epoch: 0 step: 0 loss: 7.394731521606445 lr: 0.1
epoch: 0 step: 100 loss: 6.207117080688477 lr: 0.1
epoch: 0 step: 200 loss: 5.824721336364746 lr: 0.1
epoch: 0 step: 300 loss: 5.4654998779296875 lr: 0.1
epoch: 0 test_acc: 0.3489
......
......
epoch: 260 step: 0 loss: 0.09481894969940186 lr: 0.0001
epoch: 260 step: 100 loss: 0.08265358209609985 lr: 0.0001
epoch: 260 step: 200 loss: 0.09868084639310837 lr: 0.0001
epoch: 260 step: 300 loss: 0.08602790534496307 lr: 0.0001
epoch: 260 test_acc: 0.9142
epoch: 261 step: 0 loss: 0.07496778666973114 lr: 0.0001
epoch: 261 step: 100 loss: 0.07761096954345703 lr: 0.0001
epoch: 261 step: 200 loss: 0.07461725920438766 lr: 0.0001
epoch: 261 step: 300 loss: 0.08735549449920654 lr: 0.0001
epoch: 261 test_acc: 0.9107
epoch: 262 step: 0 loss: 0.09080111980438232 lr: 0.0001
epoch: 262 step: 100 loss: 0.07853064686059952 lr: 0.0001
epoch: 262 step: 200 loss: 0.08598905801773071 lr: 0.0001
epoch: 262 step: 300 loss: 0.10019595921039581 lr: 0.0001
epoch: 262 test_acc: 0.9199
epoch: 263 step: 0 loss: 0.09563722461462021 lr: 0.0001
epoch: 263 step: 100 loss: 0.07512261718511581 lr: 0.0001
epoch: 263 step: 200 loss: 0.07557415217161179 lr: 0.0001
epoch: 263 step: 300 loss: 0.07755643874406815 lr: 0.0001
epoch: 263 test_acc: 0.922
epoch: 264 step: 0 loss: 0.08814540505409241 lr: 0.0001
epoch: 264 step: 100 loss: 0.09052534401416779 lr: 0.0001
epoch: 264 step: 200 loss: 0.08426695317029953 lr: 0.0001
epoch: 264 step: 300 loss: 0.09187693148851395 lr: 0.0001
epoch: 264 test_acc: 0.9169
epoch: 265 step: 0 loss: 0.07832321524620056 lr: 0.0001
epoch: 265 step: 100 loss: 0.08257047086954117 lr: 0.0001
epoch: 265 step: 200 loss: 0.08808299899101257 lr: 0.0001
epoch: 265 step: 300 loss: 0.0800381526350975 lr: 0.0001
epoch: 265 test_acc: 0.912
epoch: 266 step: 0 loss: 0.07372695952653885 lr: 0.0001
epoch: 266 step: 100 loss: 0.07708251476287842 lr: 0.0001
epoch: 266 step: 200 loss: 0.07618334144353867 lr: 0.0001
epoch: 266 step: 300 loss: 0.07410835474729538 lr: 0.0001
epoch: 266 test_acc: 0.9198
epoch: 267 step: 0 loss: 0.07419181615114212 lr: 0.0001
epoch: 267 step: 100 loss: 0.07721352577209473 lr: 0.0001
epoch: 267 step: 200 loss: 0.07496969401836395 lr: 0.0001
epoch: 267 step: 300 loss: 0.0783621296286583 lr: 0.0001
epoch: 267 test_acc: 0.9235
epoch: 268 step: 0 loss: 0.09658615291118622 lr: 0.0001
epoch: 268 step: 100 loss: 0.07725954800844193 lr: 0.0001
epoch: 268 step: 200 loss: 0.07876145839691162 lr: 0.0001
epoch: 268 step: 300 loss: 0.07835458219051361 lr: 0.0001
epoch: 268 test_acc: 0.9228
epoch: 269 step: 0 loss: 0.07436257600784302 lr: 0.0001
epoch: 269 step: 100 loss: 0.07431856542825699 lr: 0.0001
epoch: 269 step: 200 loss: 0.09103652834892273 lr: 0.0001
epoch: 269 step: 300 loss: 0.11077985167503357 lr: 0.0001
epoch: 269 test_acc: 0.922
epoch: 270 step: 0 loss: 0.07761970162391663 lr: 0.0001
epoch: 270 step: 100 loss: 0.08043190836906433 lr: 0.0001
epoch: 270 step: 200 loss: 0.0762677863240242 lr: 0.0001
epoch: 270 step: 300 loss: 0.07388127595186234 lr: 0.0001
epoch: 270 test_acc: 0.9286
......
......
epoch: 280 step: 0 loss: 0.073137067258358 lr: 0.0001
epoch: 280 step: 100 loss: 0.07959213107824326 lr: 0.0001
epoch: 280 step: 200 loss: 0.07135903090238571 lr: 0.0001
epoch: 280 step: 300 loss: 0.07235198467969894 lr: 0.0001
epoch: 280 test_acc: 0.9235
epoch: 281 step: 0 loss: 0.07289643585681915 lr: 0.0001
epoch: 281 step: 100 loss: 0.07314246892929077 lr: 0.0001
epoch: 281 step: 200 loss: 0.07383954524993896 lr: 0.0001
epoch: 281 step: 300 loss: 0.07232016324996948 lr: 0.0001
epoch: 281 test_acc: 0.9302
epoch: 282 step: 0 loss: 0.07303141057491302 lr: 0.0001
epoch: 282 step: 100 loss: 0.07138438522815704 lr: 0.0001
epoch: 282 step: 200 loss: 0.08452145755290985 lr: 0.0001
epoch: 282 step: 300 loss: 0.07160738855600357 lr: 0.0001
epoch: 282 test_acc: 0.9314
epoch: 283 step: 0 loss: 0.07566734403371811 lr: 0.0001
epoch: 283 step: 100 loss: 0.07201775908470154 lr: 0.0001
epoch: 283 step: 200 loss: 0.0718809962272644 lr: 0.0001
epoch: 283 step: 300 loss: 0.07106605917215347 lr: 0.0001
epoch: 283 test_acc: 0.9292
epoch: 284 step: 0 loss: 0.07280586659908295 lr: 0.0001
epoch: 284 step: 100 loss: 0.07130647450685501 lr: 0.0001
epoch: 284 step: 200 loss: 0.07442809641361237 lr: 0.0001
epoch: 284 step: 300 loss: 0.07640382647514343 lr: 0.0001
epoch: 284 test_acc: 0.9319
epoch: 285 step: 0 loss: 0.07215876877307892 lr: 0.0001
epoch: 285 step: 100 loss: 0.07119663804769516 lr: 0.0001
epoch: 285 step: 200 loss: 0.07445303350687027 lr: 0.0001
epoch: 285 step: 300 loss: 0.07052809000015259 lr: 0.0001
epoch: 285 test_acc: 0.9336
epoch: 286 step: 0 loss: 0.07295365631580353 lr: 0.0001
epoch: 286 step: 100 loss: 0.07098635286092758 lr: 0.0001
epoch: 286 step: 200 loss: 0.07169212400913239 lr: 0.0001
epoch: 286 step: 300 loss: 0.07126642018556595 lr: 0.0001
epoch: 286 test_acc: 0.9311
epoch: 287 step: 0 loss: 0.07630991190671921 lr: 0.0001
epoch: 287 step: 100 loss: 0.07050806283950806 lr: 0.0001
epoch: 287 step: 200 loss: 0.07012207061052322 lr: 0.0001
epoch: 287 step: 300 loss: 0.0746440514922142 lr: 0.0001
epoch: 287 test_acc: 0.9274
epoch: 288 step: 0 loss: 0.07019937038421631 lr: 0.0001
epoch: 288 step: 100 loss: 0.0716957300901413 lr: 0.0001
epoch: 288 step: 200 loss: 0.07188993692398071 lr: 0.0001
epoch: 288 step: 300 loss: 0.07031527161598206 lr: 0.0001
epoch: 288 test_acc: 0.9329
epoch: 289 step: 0 loss: 0.07002424448728561 lr: 0.0001
epoch: 289 step: 100 loss: 0.07001134753227234 lr: 0.0001
epoch: 289 step: 200 loss: 0.07058549672365189 lr: 0.0001
epoch: 289 step: 300 loss: 0.07042653113603592 lr: 0.0001
epoch: 289 test_acc: 0.9363
epoch: 290 step: 0 loss: 0.06984829902648926 lr: 0.0001
epoch: 290 step: 100 loss: 0.07010969519615173 lr: 0.0001
epoch: 290 step: 200 loss: 0.06986066699028015 lr: 0.0001
epoch: 290 step: 300 loss: 0.07057078182697296 lr: 0.0001
epoch: 290 test_acc: 0.9357
epoch: 291 step: 0 loss: 0.06959455460309982 lr: 0.0001
epoch: 291 step: 100 loss: 0.07020555436611176 lr: 0.0001
epoch: 291 step: 200 loss: 0.07174291461706161 lr: 0.0001
epoch: 291 step: 300 loss: 0.06960965692996979 lr: 0.0001
epoch: 291 test_acc: 0.9328
epoch: 292 step: 0 loss: 0.07022673636674881 lr: 0.0001
epoch: 292 step: 100 loss: 0.06943327933549881 lr: 0.0001
epoch: 292 step: 200 loss: 0.07133160531520844 lr: 0.0001
epoch: 292 step: 300 loss: 0.0689350962638855 lr: 0.0001
epoch: 292 test_acc: 0.9356
epoch: 293 step: 0 loss: 0.06888937950134277 lr: 0.0001
epoch: 293 step: 100 loss: 0.07159393280744553 lr: 0.0001
epoch: 293 step: 200 loss: 0.06914760172367096 lr: 0.0001
epoch: 293 step: 300 loss: 0.071612648665905 lr: 0.0001
epoch: 293 test_acc: 0.9364
epoch: 294 step: 0 loss: 0.06883252412080765 lr: 0.0001
epoch: 294 step: 100 loss: 0.07030674815177917 lr: 0.0001
epoch: 294 step: 200 loss: 0.06982254236936569 lr: 0.0001
epoch: 294 step: 300 loss: 0.06911145895719528 lr: 0.0001
epoch: 294 test_acc: 0.9384
epoch: 295 step: 0 loss: 0.06850303709506989 lr: 0.0001
epoch: 295 step: 100 loss: 0.06918036192655563 lr: 0.0001
epoch: 295 step: 200 loss: 0.06856174767017365 lr: 0.0001
epoch: 295 step: 300 loss: 0.07089497894048691 lr: 0.0001
epoch: 295 test_acc: 0.9371
epoch: 296 step: 0 loss: 0.067996084690094 lr: 0.0001
epoch: 296 step: 100 loss: 0.06892222911119461 lr: 0.0001
epoch: 296 step: 200 loss: 0.06835798919200897 lr: 0.0001
epoch: 296 step: 300 loss: 0.06783054769039154 lr: 0.0001
epoch: 296 test_acc: 0.9344
epoch: 297 step: 0 loss: 0.06848108023405075 lr: 0.0001
epoch: 297 step: 100 loss: 0.06768421083688736 lr: 0.0001
epoch: 297 step: 200 loss: 0.06921231746673584 lr: 0.0001
epoch: 297 step: 300 loss: 0.06945653259754181 lr: 0.0001
epoch: 297 test_acc: 0.9355
epoch: 298 step: 0 loss: 0.06914812326431274 lr: 0.0001
epoch: 298 step: 100 loss: 0.06926056742668152 lr: 0.0001
epoch: 298 step: 200 loss: 0.06757640093564987 lr: 0.0001
epoch: 298 step: 300 loss: 0.0675162822008133 lr: 0.0001
epoch: 298 test_acc: 0.9323
epoch: 299 step: 0 loss: 0.07044597715139389 lr: 0.0001
epoch: 299 step: 100 loss: 0.06797239929437637 lr: 0.0001
epoch: 299 step: 200 loss: 0.06796549260616302 lr: 0.0001
epoch: 299 step: 300 loss: 0.0672990009188652 lr: 0.0001
epoch: 299 test_acc: 0.9355
epoch: 300 step: 0 loss: 0.06734433025121689 lr: 6e-05
epoch: 300 step: 100 loss: 0.07003568857908249 lr: 6e-05
epoch: 300 step: 200 loss: 0.06674795597791672 lr: 6e-05
epoch: 300 step: 300 loss: 0.06762071698904037 lr: 6e-05
epoch: 300 test_acc: 0.9356
Process finished with exit code 0
2.3. 方法2:TensorFlow2.0代码演示
参考第一章;
2.4. 方法2:TensorFlow2.0执行结果
参考第一章;
三. ResNet-18实现Cifar100图像分类(注意力)!
本章是在第一章的基础了增加了注意力机制,模型的性能大概提升了0.6个百分点。注意力机制的具体的细节参考我的这篇文章:CBAM:Convolutional Block Attention Module(注意力机制)+TensorFlow2.0复现!
3.1. resnet.py
import tensorflow as tf
from tensorflow.keras import layers, Sequential, regularizers
import tensorflow.keras as keras
""" 第2个版本 2020-08-19日 @zhangkaifang 更接近最真实的resnet18 """
# 定义一个3x3卷积!kernel_initializer='he_normal','glorot_normal'
def regularized_padded_conv(*args, **kwargs):
return layers.Conv2D(*args, **kwargs, padding='same', use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=regularizers.l2(5e-4))
############################### 通道注意力机制 ###############################
class ChannelAttention(layers.Layer):
def __init__(self, in_planes, ratio=32):
super(ChannelAttention, self).__init__()
self.avg= layers.GlobalAveragePooling2D()
self.max= layers.GlobalMaxPooling2D()
self.conv1 = layers.Conv2D(in_planes//ratio, kernel_size=1, strides=1, padding='same',
kernel_regularizer=regularizers.l2(5e-4),
use_bias=True, activation=tf.nn.relu)
self.conv2 = layers.Conv2D(in_planes, kernel_size=1, strides=1, padding='same',
kernel_regularizer=regularizers.l2(5e-4),
use_bias=True)
def call(self, inputs):
avg = self.avg(inputs)
max = self.max(inputs)
avg = layers.Reshape((1, 1, avg.shape[1]))(avg) # shape (None, 1, 1 feature)
max = layers.Reshape((1, 1, max.shape[1]))(max) # shape (None, 1, 1 feature)
avg_out = self.conv2(self.conv1(avg))
max_out = self.conv2(self.conv1(max))
out = avg_out + max_out
out = tf.nn.sigmoid(out)
return out
############################### 空间注意力机制 ###############################
class SpatialAttention(layers.Layer):
def __init__(self, kernel_size=7):
super(SpatialAttention, self).__init__()
self.conv1 = regularized_padded_conv(1, kernel_size=kernel_size, strides=1, activation=tf.nn.sigmoid)
def call(self, inputs):
avg_out = tf.reduce_mean(inputs, axis=3)
max_out = tf.reduce_max(inputs, axis=3)
out = tf.stack([avg_out, max_out], axis=3) # 创建一个维度,拼接到一起concat。
out = self.conv1(out)
return out
# 1.定义 Basic Block 模块。对于Resnet18和Resnet34
class BasicBlock(layers.Layer):
expansion = 1
def __init__(self, in_channels, out_channels, stride=1):
super(BasicBlock, self).__init__()
# 1. BasicBlock模块中的共有2个卷积;BasicBlock模块中的第1个卷积层;
self.conv1 = regularized_padded_conv(out_channels, kernel_size=3, strides=stride)
self.bn1 = layers.BatchNormalization()
# 2. 第2个;第1个卷积如果做stride就会有一个下采样,在这个里面就不做下采样了。这一块始终保持size一致,把stride固定为1
self.conv2 = regularized_padded_conv(out_channels, kernel_size=3, strides=1)
self.bn2 = layers.BatchNormalization()
############################### 注意力机制 ###############################
self.ca = ChannelAttention(out_channels)
self.sa = SpatialAttention()
# 3. 判断stride是否等于1,如果为1就是没有降采样。
if stride != 1 or in_channels != self.expansion * out_channels:
self.shortcut = Sequential([regularized_padded_conv(self.expansion * out_channels,
kernel_size=1, strides=stride),
layers.BatchNormalization()])
else:
self.shortcut = lambda x, _: x
def call(self, inputs, training=False):
out = self.conv1(inputs)
out = self.bn1(out, training=training)
out = tf.nn.relu(out)
out = self.conv2(out)
out = self.bn2(out, training=training)
############################### 注意力机制 ###############################
out = self.ca(out) * out
out = self.sa(out) * out
out = out + self.shortcut(inputs, training)
out = tf.nn.relu(out)
return out
##############################################################
# 1.定义 Bottleneck 模块。对于Resnet50,Resnet101和Resnet152;
class Bottleneck(keras.Model):
expansion = 4
def __init__(self, in_channels, out_channels, strides=1):
super(Bottleneck, self).__init__()
self.conv1 = regularized_padded_conv(out_channels, 1, 1)
self.bn1 = layers.BatchNormalization()
self.conv2 = regularized_padded_conv(out_channels, 3, strides)
self.bn2 = layers.BatchNormalization()
self.conv3 = regularized_padded_conv(out_channels*self.expansion, 1, 1)
self.bn3 = layers.BatchNormalization()
############################### 注意力机制 ###############################
self.ca = ChannelAttention(self.expansion * out_channels)
self.sa = SpatialAttention()
if strides != 1 or in_channels != self.expansion * out_channels:
self.shortcut = Sequential([regularized_padded_conv(self.expansion * out_channels, kernel_size=1,
strides=strides),
layers.BatchNormalization()])
else:
self.shortcut = lambda x,_: x
def call(self, x, training=False):
out = tf.nn.relu(self.bn1(self.conv1(x), training))
out = tf.nn.relu(self.bn2(self.conv2(out), training))
out = self.bn3(self.conv3(out), training)
############################### 注意力机制 ###############################
out = self.ca(out) * out
out = self.sa(out) * out
out = out + self.shortcut(x, training)
out = tf.nn.relu(out)
return out
##############################################################
# 2. ResBlock 模块。继承keras.Model或者keras.Layer都可以
class ResNet(keras.Model):
# 第1个参数layer_dims:[2, 2, 2, 2] 4个Res Block,每个包含2个Basic Block,第3参数num_classes:我们的全连接输出,取决于输出有多少类。
def __init__(self, blocks, layer_dims, num_classes=100):
super(ResNet, self).__init__()
self.in_channels = 64
# 0. 预处理卷积层;实现比较灵活可以加MAXPool2D,或者不加,这里没加。注意这里的channels需要和layer1的channels是一样的,不然能add。
self.stem = Sequential([regularized_padded_conv(64, kernel_size=3, strides=1),
layers.BatchNormalization()])
# 1. 创建4个ResBlock;注意第1项不一定以2倍形式扩张,都是比较随意的,这里都是经验值。
self.layer1 = self.build_resblock(blocks, 64, layer_dims[0], stride=1)
self.layer2 = self.build_resblock(blocks, 128, layer_dims[1], stride=2)
self.layer3 = self.build_resblock(blocks, 256, layer_dims[2], stride=2)
self.layer4 = self.build_resblock(blocks, 512, layer_dims[3], stride=2)
# self.final_bn = layers.BatchNormalization()
# self.avgpool = layers.GlobalAveragePooling2D()
# self.fc = layers.Dense(num_classes)
# 2. 创建ResBlock
def build_resblock(self, blocks, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1) # [1]*3 = [1, 1, 1]
res_blocks = Sequential()
for stride in strides:
res_blocks.add(blocks(self.in_channels, out_channels, stride))
self.in_channels = out_channels
return res_blocks
def call(self,inputs, training=False):
# __init__中准备工作完毕;下面完成前向运算过程。
out = self.stem(inputs, training)
out = tf.nn.relu(out)
out = self.layer1(out, training=training)
out = self.layer2(out, training=training)
out = self.layer3(out, training=training)
out = self.layer4(out, training=training)
# out = self.final_bn(out, training=training)
# out = tf.nn.relu(out)
# 做一个global average pooling,得到之后只会得到一个channel,不需要做reshape操作了。 shape为 [batchsize, channel]
# out = self.avgpool(out)
# [b, 100]
# out = self.fc(out)
return out
##############################################################
""" Resnet18 """
def ResNet18():
return ResNet(BasicBlock, [2, 2, 2, 2])
""" ResNet-34,那34是怎样的配置呢?只需要改一下这里就可以了。4个Res Block """
# 如果我们要使用
def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
""" Resnet50 """
def ResNet50():
return ResNet(Bottleneck, [3, 4, 6, 3])
""" Resnet101 """
def ResNet101():
return ResNet(Bottleneck, [3, 4, 23, 3])
""" Resnet152 """
def ResNet152():
return ResNet(Bottleneck, [3, 8, 36, 3])
3.2. res_my_soft.py
import tensorflow as tf
from tensorflow.keras import layers, optimizers, datasets, Sequential, regularizers
from resnet import ResNet18
import numpy as np
import random
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.random.set_seed(2345)
batchsz = 256
# 1. 归一化函数实现;cifar100 均值和方差,自己计算的。
img_mean = tf.constant([0.50736203482434500, 0.4866895632914611, 0.4410885713465068])
img_std = tf.constant([0.26748815488001604, 0.2565930997269337, 0.2763085095510783])
def normalize(x, mean=img_mean, std=img_std):
x = (x - mean)/std
return x
# 2. 数据预处理,仅仅是类型的转换。 [-1~1]
def preprocess(x, y):
x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) # 上下填充4个0,左右填充4个0,变为[40, 40, 3]
x = tf.image.random_crop(x, [32, 32, 3])
x = tf.image.random_flip_left_right(x)
# x: [0,255]=> -1~1 其次:normalizaion
x = tf.cast(x, dtype=tf.float32) / 255.
# 0~1 => D(0,1) 调用函数;
x = normalize(x)
y = tf.cast(y, dtype=tf.int32)
return x, y
# 3. 学习率调整测率200epoch
def lr_schedule_300ep(epoch):
if epoch < 60:
return 0.1
if epoch < 120:
return 0.02
if epoch < 160:
return 0.004
if epoch < 200:
return 0.0008
if epoch < 250:
return 0.0003
if epoch < 300:
return 0.0001
else:
return 0.00006
# 数据集的加载
(x, y), (x_test, y_test) = datasets.cifar100.load_data()
y = tf.squeeze(y) # 或者tf.squeeze(y, axis=1)把1维度的squeeze掉。
y_test = tf.squeeze(y_test) # 或者tf.squeeze(y, axis=1)把1维度的squeeze掉。
print(x.shape, y.shape, x_test.shape, y_test.shape)
# 训练集和标签包装成Dataset对象
train_db = tf.data.Dataset.from_tensor_slices((x, y))
train_db = train_db.shuffle(5000).map(preprocess).batch(batchsz)
# 测试集和标签包装成Dataset对象
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.map(preprocess).batch(batchsz)
# 我们来取一个样本,测试一下sample的形状。
sample = next(iter(train_db))
print('sample:', sample[0].shape, sample[1].shape,
tf.reduce_min(sample[0]),
tf.reduce_max(sample[0])) # 值范围为[0,1]
def main():
# 输入:[b, 32, 32, 3]
model = ResNet18()
model.build(input_shape=(None, 32, 32, 3))
model.summary()
mydense = layers.Dense(100, activation=None, kernel_regularizer=regularizers.l2(5e-4))
fc_net = Sequential([mydense])
fc_net.build(input_shape=(None, 512))
fc_net.summary()
optimizer = optimizers.SGD(lr=0.1, momentum=0.9, decay=5e-4)
variables = model.trainable_variables + fc_net.trainable_variables
for epoch in range(500):
for step, (x, y) in enumerate(train_db):
with tf.GradientTape() as tape:
out = model(x, training=True)
avgpool = layers.GlobalAveragePooling2D()(out)
logits = fc_net(avgpool)
y_onehot = tf.one_hot(y, depth=100)
loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True))
loss = loss + tf.add_n(model.losses) + tf.add_n(fc_net.losses)
# 梯度求解
grads = tape.gradient(loss, variables)
# 梯度更新
optimizer.apply_gradients(zip(grads, variables))
# 学习率动态调整
optimizer.lr = lr_schedule_300ep(epoch)
# 每100个step打印一次
if step % 100 == 0:
print('epoch:', epoch, 'step:', step, 'loss:', float(loss), 'lr:', optimizer.lr.numpy())
# 做测试
total_num = 0
total_correct = 0
for x, y in test_db:
out = model(x, training=False)
avgpool = layers.GlobalAveragePooling2D()(out)
output = fc_net(avgpool)
# 预测可能性。
prob = tf.nn.softmax(output, axis=1)
pred = tf.argmax(prob, axis=1) # 还记得吗pred类型为int64,需要转换一下。
pred = tf.cast(pred, dtype=tf.int32)
# 拿到预测值pred和真实值比较。
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_num += x.shape[0]
total_correct += int(correct) # 转换为numpy数据
acc = total_correct / total_num
print('epoch:', epoch, 'test_acc:', acc)
print('====================================================')
if __name__ == '__main__':
main()
3.3. 有注意力机制结果
- 从以下结果可以发现,增加注意力机制,预测准确率大概能够增加0.6个百分点。
ssh://zhangkf@192.168.136.55:22/home/zhangkf/anaconda3/envs/tf2c/bin/python -u /home/zhangkf/tf/TF1/resnet_my_soft.py
(50000, 32, 32, 3) (50000,) (10000, 32, 32, 3) (10000,)
WARNING:tensorflow:From /home/zhangkf/anaconda3/envs/tf2c/lib/python3.7/site-packages/tensorflow_core/python/data/util/random_seed.py:58: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
sample: (256, 32, 32, 3) (256,) tf.Tensor(-1.8967644, shape=(), dtype=float32) tf.Tensor(2.0227804, shape=(), dtype=float32)
Model: "res_net"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
sequential (Sequential) multiple 1984
_________________________________________________________________
sequential_1 (Sequential) multiple 149320
_________________________________________________________________
sequential_2 (Sequential) multiple 529356
_________________________________________________________________
sequential_4 (Sequential) multiple 2111188
_________________________________________________________________
sequential_6 (Sequential) multiple 8432868
=================================================================
Total params: 11,224,716
Trainable params: 11,215,116
Non-trainable params: 9,600
_________________________________________________________________
Model: "sequential_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) multiple 51300
=================================================================
Total params: 51,300
Trainable params: 51,300
Non-trainable params: 0
_________________________________________________________________
epoch: 0 step: 0 loss: 9.862617492675781 lr: 0.1
epoch: 0 step: 100 loss: 8.007781028747559 lr: 0.1
epoch: 0 test_acc: 0.1481
====================================================
epoch: 1 step: 0 loss: 6.917511463165283 lr: 0.1
epoch: 1 step: 100 loss: 6.15842342376709 lr: 0.1
epoch: 1 test_acc: 0.1723
====================================================
epoch: 2 step: 0 loss: 5.508969783782959 lr: 0.1
epoch: 2 step: 100 loss: 4.93193244934082 lr: 0.1
epoch: 2 test_acc: 0.2722
====================================================
epoch: 3 step: 0 loss: 4.648748874664307 lr: 0.1
epoch: 3 step: 100 loss: 4.1789631843566895 lr: 0.1
epoch: 3 test_acc: 0.3446
====================================================
epoch: 4 step: 0 loss: 3.8076817989349365 lr: 0.1
epoch: 4 step: 100 loss: 3.277998208999634 lr: 0.1
epoch: 4 test_acc: 0.3248
====================================================
epoch: 5 step: 0 loss: 3.427459478378296 lr: 0.1
epoch: 5 step: 100 loss: 3.127424955368042 lr: 0.1
epoch: 5 test_acc: 0.3511
====================================================
epoch: 6 step: 0 loss: 2.9850730895996094 lr: 0.1
epoch: 6 step: 100 loss: 2.75416898727417 lr: 0.1
epoch: 6 test_acc: 0.4152
====================================================
epoch: 7 step: 0 loss: 2.8390228748321533 lr: 0.1
epoch: 7 step: 100 loss: 2.6923742294311523 lr: 0.1
epoch: 7 test_acc: 0.4475
====================================================
epoch: 8 step: 0 loss: 2.392704963684082 lr: 0.1
epoch: 8 step: 100 loss: 2.4507105350494385 lr: 0.1
epoch: 8 test_acc: 0.4523
====================================================
epoch: 9 step: 0 loss: 2.269890069961548 lr: 0.1
epoch: 9 step: 100 loss: 2.2282142639160156 lr: 0.1
epoch: 9 test_acc: 0.4468
====================================================
epoch: 10 step: 0 loss: 2.203409433364868 lr: 0.1
epoch: 10 step: 100 loss: 1.9339406490325928 lr: 0.1
epoch: 10 test_acc: 0.5296
====================================================
epoch: 11 step: 0 loss: 2.079651117324829 lr: 0.1
epoch: 11 step: 100 loss: 1.856602668762207 lr: 0.1
epoch: 11 test_acc: 0.4892
====================================================
epoch: 12 step: 0 loss: 1.8215320110321045 lr: 0.1
epoch: 12 step: 100 loss: 1.8682109117507935 lr: 0.1
epoch: 12 test_acc: 0.5193
====================================================
epoch: 13 step: 0 loss: 2.033259153366089 lr: 0.1
epoch: 13 step: 100 loss: 1.8576289415359497 lr: 0.1
epoch: 13 test_acc: 0.5181
====================================================
epoch: 14 step: 0 loss: 1.709835410118103 lr: 0.1
epoch: 14 step: 100 loss: 1.8038767576217651 lr: 0.1
epoch: 14 test_acc: 0.5135
====================================================
epoch: 15 step: 0 loss: 1.7499812841415405 lr: 0.1
epoch: 15 step: 100 loss: 1.7479110956192017 lr: 0.1
epoch: 15 test_acc: 0.4005
====================================================
epoch: 16 step: 0 loss: 1.6853797435760498 lr: 0.1
epoch: 16 step: 100 loss: 1.6007542610168457 lr: 0.1
epoch: 16 test_acc: 0.548
====================================================
epoch: 17 step: 0 loss: 1.6215128898620605 lr: 0.1
epoch: 17 step: 100 loss: 1.6284090280532837 lr: 0.1
epoch: 17 test_acc: 0.5252
====================================================
epoch: 18 step: 0 loss: 1.642221212387085 lr: 0.1
epoch: 18 step: 100 loss: 1.627113938331604 lr: 0.1
epoch: 18 test_acc: 0.5898
====================================================
epoch: 19 step: 0 loss: 1.4609442949295044 lr: 0.1
epoch: 19 step: 100 loss: 1.4904931783676147 lr: 0.1
epoch: 19 test_acc: 0.5581
====================================================
epoch: 20 step: 0 loss: 1.4570353031158447 lr: 0.1
epoch: 20 step: 100 loss: 1.562164068222046 lr: 0.1
epoch: 20 test_acc: 0.558
====================================================
......
......
......
====================================================
epoch: 122 step: 0 loss: 0.30877184867858887 lr: 0.004
epoch: 122 step: 100 loss: 0.31004631519317627 lr: 0.004
epoch: 122 test_acc: 0.7556
====================================================
epoch: 123 step: 0 loss: 0.3107128143310547 lr: 0.004
epoch: 123 step: 100 loss: 0.30925899744033813 lr: 0.004
epoch: 123 test_acc: 0.7553
====================================================
epoch: 124 step: 0 loss: 0.3087705075740814 lr: 0.004
epoch: 124 step: 100 loss: 0.31475159525871277 lr: 0.004
epoch: 124 test_acc: 0.7547
====================================================
epoch: 125 step: 0 loss: 0.3086572587490082 lr: 0.004
epoch: 125 step: 100 loss: 0.31604987382888794 lr: 0.004
epoch: 125 test_acc: 0.7545
====================================================
epoch: 126 step: 0 loss: 0.30836617946624756 lr: 0.004
epoch: 126 step: 100 loss: 0.309495210647583 lr: 0.004
epoch: 126 test_acc: 0.755
====================================================
epoch: 127 step: 0 loss: 0.3072863221168518 lr: 0.004
epoch: 127 step: 100 loss: 0.30815497040748596 lr: 0.004
epoch: 127 test_acc: 0.7544
====================================================
epoch: 128 step: 0 loss: 0.3070422410964966 lr: 0.004
epoch: 128 step: 100 loss: 0.30973508954048157 lr: 0.004
epoch: 128 test_acc: 0.7545
====================================================
epoch: 129 step: 0 loss: 0.3068530857563019 lr: 0.004
epoch: 129 step: 100 loss: 0.30841246247291565 lr: 0.004
epoch: 129 test_acc: 0.7546
====================================================
epoch: 130 step: 0 loss: 0.3065824508666992 lr: 0.004
epoch: 130 step: 100 loss: 0.3062731921672821 lr: 0.004
epoch: 130 test_acc: 0.7546
====================================================
epoch: 131 step: 0 loss: 0.30787158012390137 lr: 0.004
epoch: 131 step: 100 loss: 0.30552181601524353 lr: 0.004
epoch: 131 test_acc: 0.7557
====================================================
epoch: 132 step: 0 loss: 0.3076384663581848 lr: 0.004
epoch: 132 step: 100 loss: 0.3065154552459717 lr: 0.004
epoch: 132 test_acc: 0.7546
====================================================
epoch: 133 step: 0 loss: 0.3064778745174408 lr: 0.004
epoch: 133 step: 100 loss: 0.30628499388694763 lr: 0.004
epoch: 133 test_acc: 0.755
====================================================
epoch: 134 step: 0 loss: 0.30591851472854614 lr: 0.004
epoch: 134 step: 100 loss: 0.30593985319137573 lr: 0.004
epoch: 134 test_acc: 0.7549
====================================================
epoch: 135 step: 0 loss: 0.3065711259841919 lr: 0.004
epoch: 135 step: 100 loss: 0.30555734038352966 lr: 0.004
epoch: 135 test_acc: 0.7546
====================================================
epoch: 136 step: 0 loss: 0.305118590593338 lr: 0.004
epoch: 136 step: 100 loss: 0.3063117563724518 lr: 0.004
epoch: 136 test_acc: 0.7551
====================================================
epoch: 137 step: 0 loss: 0.30526748299598694 lr: 0.004
epoch: 137 step: 100 loss: 0.30621927976608276 lr: 0.004
epoch: 137 test_acc: 0.7548
====================================================
epoch: 138 step: 0 loss: 0.30694591999053955 lr: 0.004
epoch: 138 step: 100 loss: 0.3033943772315979 lr: 0.004
epoch: 138 test_acc: 0.7548
====================================================
epoch: 139 step: 0 loss: 0.30369484424591064 lr: 0.004
epoch: 139 step: 100 loss: 0.3046955466270447 lr: 0.004
epoch: 139 test_acc: 0.7554
====================================================
......
......
......
====================================================
epoch: 160 step: 0 loss: 0.2971763610839844 lr: 0.0008
epoch: 160 step: 100 loss: 0.2977881133556366 lr: 0.0008
epoch: 160 test_acc: 0.7556
====================================================
epoch: 161 step: 0 loss: 0.2974894642829895 lr: 0.0008
epoch: 161 step: 100 loss: 0.2992340326309204 lr: 0.0008
epoch: 161 test_acc: 0.7557
====================================================
epoch: 162 step: 0 loss: 0.2972643971443176 lr: 0.0008
epoch: 162 step: 100 loss: 0.29837867617607117 lr: 0.0008
epoch: 162 test_acc: 0.7558
====================================================
epoch: 163 step: 0 loss: 0.3012158274650574 lr: 0.0008
epoch: 163 step: 100 loss: 0.299969345331192 lr: 0.0008
epoch: 163 test_acc: 0.7559
====================================================
epoch: 164 step: 0 loss: 0.29745134711265564 lr: 0.0008
epoch: 164 step: 100 loss: 0.2974349558353424 lr: 0.0008
epoch: 164 test_acc: 0.7558
====================================================
epoch: 165 step: 0 loss: 0.29991328716278076 lr: 0.0008
epoch: 165 step: 100 loss: 0.29844990372657776 lr: 0.0008
epoch: 165 test_acc: 0.7559
====================================================
epoch: 166 step: 0 loss: 0.2979045808315277 lr: 0.0008
epoch: 166 step: 100 loss: 0.2974371910095215 lr: 0.0008
epoch: 166 test_acc: 0.7555
====================================================
epoch: 167 step: 0 loss: 0.29838019609451294 lr: 0.0008
epoch: 167 step: 100 loss: 0.2982860505580902 lr: 0.0008
epoch: 167 test_acc: 0.7559
====================================================
epoch: 168 step: 0 loss: 0.2991889417171478 lr: 0.0008
epoch: 168 step: 100 loss: 0.2982766032218933 lr: 0.0008
epoch: 168 test_acc: 0.7559
====================================================
epoch: 169 step: 0 loss: 0.2983883023262024 lr: 0.0008
epoch: 169 step: 100 loss: 0.2980871796607971 lr: 0.0008
epoch: 169 test_acc: 0.7559
====================================================
epoch: 170 step: 0 loss: 0.2980022430419922 lr: 0.0008
epoch: 170 step: 100 loss: 0.30145472288131714 lr: 0.0008
epoch: 170 test_acc: 0.7559
====================================================
epoch: 171 step: 0 loss: 0.30029699206352234 lr: 0.0008
epoch: 171 step: 100 loss: 0.297269344329834 lr: 0.0008
epoch: 171 test_acc: 0.7551
====================================================
epoch: 172 step: 0 loss: 0.2982468008995056 lr: 0.0008
epoch: 172 step: 100 loss: 0.29756346344947815 lr: 0.0008
epoch: 172 test_acc: 0.7559
====================================================
epoch: 173 step: 0 loss: 0.2984648048877716 lr: 0.0008
epoch: 173 step: 100 loss: 0.2978881299495697 lr: 0.0008
epoch: 173 test_acc: 0.7559
====================================================
epoch: 174 step: 0 loss: 0.2976265549659729 lr: 0.0008
epoch: 174 step: 100 loss: 0.2978631258010864 lr: 0.0008
epoch: 174 test_acc: 0.7556
====================================================
epoch: 175 step: 0 loss: 0.29755985736846924 lr: 0.0008
epoch: 175 step: 100 loss: 0.30176782608032227 lr: 0.0008
epoch: 175 test_acc: 0.7554
====================================================
epoch: 176 step: 0 loss: 0.2987819015979767 lr: 0.0008
epoch: 176 step: 100 loss: 0.2976752817630768 lr: 0.0008
epoch: 176 test_acc: 0.7553
====================================================
epoch: 177 step: 0 loss: 0.299359530210495 lr: 0.0008
epoch: 177 step: 100 loss: 0.30031245946884155 lr: 0.0008
epoch: 177 test_acc: 0.7557
====================================================
epoch: 178 step: 0 loss: 0.2971976101398468 lr: 0.0008
epoch: 178 step: 100 loss: 0.2986002266407013 lr: 0.0008
epoch: 178 test_acc: 0.7557
====================================================
epoch: 179 step: 0 loss: 0.2972680330276489 lr: 0.0008
epoch: 179 step: 100 loss: 0.2961897552013397 lr: 0.0008
epoch: 179 test_acc: 0.7558
====================================================
epoch: 180 step: 0 loss: 0.29784226417541504 lr: 0.0008
epoch: 180 step: 100 loss: 0.297781765460968 lr: 0.0008
epoch: 180 test_acc: 0.7557