- one-hot编码:[0,…,0,1,0,…,0]
- 前向:三层,前输出为后输出
- //反向:输出对参数的偏导
代码:
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
# 设置GPU使用方式
# 获取GPU列表
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
# 设置GPU为增长式占用
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
# 打印异常
print(e)
(xs, ys),_ = datasets.mnist.load_data()
print('datasets:', xs.shape, ys.shape, xs.min(), xs.max())
batch_size = 32
xs = tf.convert_to_tensor(xs, dtype=tf.float32) / 255. #预处理
db = tf.data.Dataset.from_tensor_slices((xs,ys))
db = db.batch(batch_size).repeat(30) #30个epoch
model = Sequential([layers.Dense(256, activation='relu'), #全连接层,激活函数relu #[4,784]->[4,256] #写的是输出维度
layers.Dense(128, activation='relu'),
layers.Dense(10)])
model.build(input_shape=(4, 28*28)) #输入规格
model.summary() #打印网络结构
optimizer = optimizers.SGD(lr=0.01) #sgd:随机梯度下降优化器 #学习率0.01
acc_meter = metrics.Accuracy() #tf.metrics.accuracy返回两个值,accuracy为到上一个batch为止的准确度,update_op为更新本批次后的准确度
for step, (x,y) in enumerate(db):
with tf.GradientTape() as tape: #前向传播
# 打平操作,[b, 28, 28] => [b, 784]
x = tf.reshape(x, (-1, 28*28))
# Step1. 得到模型输出output [b, 784] => [b, 10]
out = model(x) #x放入网络得到输出
# [b] => [b, 10]
y_onehot = tf.one_hot(y, depth=10) #y转为one-hot
# 计算差的平方和,[b, 10]
loss = tf.square(out-y_onehot) #差的平方和
# 计算每个样本的平均误差,[b]
loss = tf.reduce_sum(loss) / x.shape[0] #平均误差
acc_meter.update_state(tf.argmax(out, axis=1), y) #更新后的精确度
grads = tape.gradient(loss, model.trainable_variables) #计算梯度
optimizer.apply_gradients(zip(grads, model.trainable_variables)) #梯度原地更新
if step % 200==0:
print(step, 'loss:', float(loss), 'acc:', acc_meter.result().numpy())
acc_meter.reset_states()
运行结果:
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
11493376/11490434 [==============================] - 0s 0us/step
11501568/11490434 [==============================] - 0s 0us/step
datasets: (60000, 28, 28) (60000,) 0 255
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (4, 256) 200960
dense_1 (Dense) (4, 128) 32896
dense_2 (Dense) (4, 10) 1290
=================================================================
Total params: 235,146
Trainable params: 235,146
Non-trainable params: 0
_________________________________________________________________
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/gradient_descent.py:102: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
super(SGD, self).__init__(name, **kwargs)
0 loss: 2.154576063156128 acc: 0.0625
200 loss: 0.4591897130012512 acc: 0.6734375
400 loss: 0.4148889183998108 acc: 0.84328127
600 loss: 0.33673521876335144 acc: 0.8603125
800 loss: 0.2931180000305176 acc: 0.8907812
…… #太长了,省略
54200 loss: 0.021416034549474716 acc: 0.98046875
54400 loss: 0.028198830783367157 acc: 0.98859376
54600 loss: 0.0634356364607811 acc: 0.9865625
54800 loss: 0.019117143005132675 acc: 0.9870312
55000 loss: 0.07670075446367264 acc: 0.985625
55200 loss: 0.049966394901275635 acc: 0.98625
55400 loss: 0.0822611153125763 acc: 0.98375
55600 loss: 0.020873377099633217 acc: 0.98578125
55800 loss: 0.11799027025699615 acc: 0.9840625
56000 loss: 0.045944154262542725 acc: 0.98109376
56200 loss: 0.038981664925813675 acc: 0.9870312