【干货】Batch Normalization: 如何更快地训练深度神经网络
- reuse=2 则使用Batch-Normlization
- reuse=3 不使用Batch-Normlization
从上图分析可以BN可以加快网络训练的速度
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 20 20:51:12 2018
# 1: Input shape 16x16x1
# 2: Output shape 10x1
# 3: C1 = [5,5,1,10] stride = 1 padding = same, activation = relu
# 4: C2 = [4,4,10,16] stride = 2 padding = same, activation = relu
# 5: C3 = [4,4,16,12] stride = 3 padding = same, activation = relu
# 6: softmax
# 7: seed = 4 A→C:87%
# seed = 89 A→C:88%
seed = 92 A→C:90% + Dropout 0.25
seed = 87 A→C:95% + Dropout 0.25
@author: brucelau
"""
import numpy as np
import keras
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D,Activation,BatchNormalization
from sklearn.model_selection import StratifiedShuffleSplit
from keras.objectives import categorical_crossentropy
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
import argparse
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"
parser = argparse.ArgumentParser('Setting Model Parameters')
parser.add_argument('--SNR',default= -2,type=int,help='Set the SNR value from [-4,-2,0,2,4,6,8,10,1000]')
parser.add_argument('--train_set',default='C',type=str,help='Set the training dataset')
parser.add_argument('--test_set',default='A',type=str,help='Set the testing dataset')
parser.add_argument('--order',default=1,type=int,help='The runtime order')
args = parser.parse_args()
SNR = args.SNR
order = args.order
#SNR = -4
#train_set = 'C'
#test_set = 'A'
train_set = args.train_set
test_set = args.test_set
if SNR==1000:SNR=996
#%%
def training_vis(hist):
loss = hist.history['loss']
val_loss = hist.history['val_loss']
acc = hist.history['acc']
val_acc = hist.history['val_acc']
# make a figure
fig = plt.figure(figsize=(8,4))
# subplot loss
ax1 = fig.add_subplot(121)
ax1.plot(loss,label=u'训练集损失值')
ax1.plot(val_loss,label=u'验证集损失值')
ax1.set_xlabel(u'迭代次数Epochs')
ax1.set_ylabel(u'损失值')
ax1.set_title(u'在训练集与验证集上的损失值')
ax1.legend()
plt.grid()
# subplot acc
ax2 = fig.add_subplot(122)
ax2.plot(acc,label=u'训练集上的准确率')
ax2.plot(val_acc,label=u'验证集上的准确率')
ax2.set_xlabel(u'迭代次数Epochs')
ax2.set_ylabel(u'诊断准确率')
ax2.set_title(u'在训练集与验证集上的准确率')
ax2.legend()
plt.tight_layout()
plt.grid()
plt.show()
# save figure
plt.savefig('train.png')
#%%
batch_size = 128
num_classes = 10
epochs = 100
patience = 100
k1 = 10
k2 = 16
k3 = 12
# input image dimensions and set tensorflow input shape order
img_rows, img_cols = 16, 16
input_shape = (img_rows, img_cols, 2)
# load train data
X_train = np.load('data/noise/level_%d/train_%s.npy'%(SNR+4,train_set)).reshape([-1,16,16,2])
y_train = np.load('data/noise/level_%d/labels_train.npy'%(SNR+4))
X_test = np.load('data/noise/level_%d/test_%s.npy'%(SNR+4,test_set))
y_test = np.load('data/noise/level_%d/labels_test.npy'%(SNR+4))
seed = np.random.randint(0,100)
# split train data into two parts: train and validation
train_idx, val_idx = next(iter(
StratifiedShuffleSplit(n_splits=1, test_size=0.2,
random_state=seed).split(X_train,y_train)))
X_train_train = X_train[train_idx]
y_train_train = y_train[train_idx]
X_train_val = X_train[val_idx]
y_train_val = y_train[val_idx]
# convert class vectors to binary class matrices
y_train_train = keras.utils.to_categorical(y_train_train, num_classes)
y_train_val = keras.utils.to_categorical(y_train_val, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
#%%
# building keras model
reuse = 2
if reuse == 1:
model = load_model('model/gear_model.h5')
print('Using the trained model.')
elif reuse == 2:
print('Using the untrained model')
model = Sequential()
model.add(Conv2D(k1,kernel_size=(5,5),input_shape=input_shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Conv2D(k2,kernel_size=(4,4),strides=(2,2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Conv2D(k3,kernel_size=(4,4),strides=(1,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
else:
print('The third model')
model = Sequential()
model.add(Conv2D(k1,kernel_size=(5,5),activation='relu',input_shape=input_shape,name='my-Conv2d-1'))
model.add(Conv2D(k2,kernel_size=(4,4),activation='relu',strides=(2,2),name='my-Conv2d-2'))
model.add(Dropout(0.25))
model.add(Conv2D(k3,kernel_size=(4,4),activation='relu',strides=(1,1),name='my-Conv2d-3'))
model.add(Dropout(0.25))
model.add(Flatten(name='my-Flatten'))
model.add(Dense(200, activation='relu',name='my-Dense1'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax',name='my-Dense2'))
#%%
#%%
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
hist = model.fit(X_train_train, y_train_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(X_train_val, y_train_val),
callbacks = [EarlyStopping(patience=patience,mode='min',verbose=0)])
#
score = model.evaluate(X_test, y_test, verbose=0)
loss = score[0]
accuracy = score[1]
print('Test loss:', loss)
print('Test accuracy:', accuracy)
#%% tensorboard check
# tensorboard --logdir Graph/
training_vis(hist)
#%%
#model.save('data/model/level_%d/%s%s_%d_acc_%d_%d.h5'%(SNR+4,train_set,test_set,SNR+4,order,int(accuracy*100000)))
model.save('model/gear_model.h5')
#%%
#from keras.models import load_model
#model = load_model('data/model/level_0/CA_0_acc_0.752400.h5')
#score = model.evaluate(X_test, y_test, verbose=0)
#loss = score[0]
#accuracy = score[1]
#print('Test loss:', loss)
#print('Test accuracy:', accuracy)