人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估-CFANZ编程社区

一、 VGG 网络对人脸识别数据集

使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估。

数据集为 2019 年上传的十个中国明星的人脸图片（只做学习使用，不得做与其他用途）。

按照深度学习的传统，我们将训练集和测试集按照 9:1 的比例进行划分。自定义的数据集，首先要生成图像列表，把自定的图像分为测试集和训练集，并带有标签。下面的程序可以单独运行，只要把一个大类的文件夹路径传进去就可以了,该程序会把里面的每个小类别都迭代,生成固定格式的列表.比如我们把人脸类别的根目录传进去../images/face。最后会在指定目录下面生成三个文件，readme.json、trainer.list 和 test.list.

VGG的核心是五组卷积操作，每两组之间做Max-Pooling空间降维。同一组内采用多次连续的3X3卷积，卷积核的数目由较浅组的64增多到最深组的512，同一组内的卷积核数目是一样的。卷积之后接两层全连接层，之后是分类层。由于每组内卷积层的不同，有11、13、16、19层这几种模型，上图展示一个16层的网络结构。

二、操作步骤

1.数据准备

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle

import paddle
import paddle.fluid as fluid
from multiprocessing import cpu_count
import math
from sklearn.metrics import confusion_matrix
import pandas as pd

import os
import zipfile
import random
import json
import paddle
import sys
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from paddle.io import Dataset
import paddle.nn.functional as F
print("本教程基于Paddle的版本号为："+paddle.__version__)

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle_02

'''
参数配置
'''
train_parameters = {
    "input_size": [3, 224, 224],                              #输入图片的shape
    "class_dim": 10,                                          #分类数
    "target_path":"/home/aistudio/",                     #要解压的路径
    "train_list_path": "/home/aistudio/train.txt",       #train.txt路径
    "eval_list_path": "/home/aistudio/eval.txt",         #eval.txt路径
    "readme_path": "/home/aistudio/readme.json",         #readme.json路径
    "label_dict":{},                                          #标签字典
    "num_epochs": 5,                                         #训练轮数
    "train_batch_size": 16,                                    #训练时每个批次的大小
    "learning_strategy": {                                    #优化函数相关的配置
        "lr": 0.001                                        #超参数学习率
    } 
}

构建数据集

（1）按照比例划分训练集与验证集；

（2）乱序，生成数据列表；

（3）构造训练数据集提供器和验证数据集提供器。

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_03

def get_data_list(target_path,train_list_path,eval_list_path):
    '''
    生成数据列表
    '''
    #存放所有类别的信息
    class_detail = []
    #获取所有类别保存的文件夹名称
    data_list_path=target_path+"images/face/"
    class_dirs = os.listdir(data_list_path)  
    #总的图像数量
    all_class_images = 0
    #存放类别标签
    class_label=0
    #存放类别数目
    class_dim = 0
    #存储要写进eval.txt和train.txt中的内容
    trainer_list=[]
    eval_list=[]
    #读取每个类别，['maskimages', 'nomaskimages']
    for class_dir in class_dirs:
        if class_dir != ".DS_Store":
            class_dim += 1
            #每个类别的信息
            class_detail_list = {}
            eval_sum = 0
            trainer_sum = 0
            #统计每个类别有多少张图片
            class_sum = 0
            #获取类别路径 
            path = data_list_path  + class_dir
            # 获取所有图片
            img_paths = os.listdir(path)
            for img_path in img_paths:                                  # 遍历文件夹下的每个图片
                name_path = path + '/' + img_path                       # 每张图片的路径
                if class_sum % 10 == 0:                                 # 每10张图片取一个做验证数据
                    eval_sum += 1                                       # test_sum为测试数据的数目
                    eval_list.append(name_path + "\t%d" % class_label + "\n")
                else:
                    trainer_sum += 1 
                    trainer_list.append(name_path + "\t%d" % class_label + "\n")#trainer_sum测试数据的数目
                class_sum += 1                                          #每类图片的数目
                all_class_images += 1                                   #所有类图片的数目
             
            # 说明的json文件的class_detail数据
            class_detail_list['class_name'] = class_dir             #类别名称，如jiangwen
            class_detail_list['class_label'] = class_label          #类别标签
            class_detail_list['class_eval_images'] = eval_sum       #该类数据的测试集数目
            class_detail_list['class_trainer_images'] = trainer_sum #该类数据的训练集数目
            class_detail.append(class_detail_list)  
            #初始化标签列表
            train_parameters['label_dict'][str(class_label)] = class_dir
            class_label += 1 
            
    #初始化分类数
    train_parameters['class_dim'] = class_dim

   
    
    #乱序  
    random.shuffle(eval_list)
    with open(eval_list_path, 'a') as f:
        for eval_image in eval_list:
            f.write(eval_image) 
            
    random.shuffle(trainer_list)
    with open(train_list_path, 'a') as f2:
        for train_image in trainer_list:
            f2.write(train_image) 

    # 说明的json文件信息
    readjson = {}
    readjson['all_class_name'] = data_list_path                  #文件父目录
    readjson['all_class_images'] = all_class_images
    readjson['class_detail'] = class_detail
    jsons = json.dumps(readjson, sort_keys=True, indent=4, separators=(',', ': '))
    with open(train_parameters['readme_path'],'w') as f:
        f.write(jsons)
    print ('生成数据列表完成！')

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_04

'''
参数初始化
'''
target_path=train_parameters['target_path']
train_list_path=train_parameters['train_list_path']
eval_list_path=train_parameters['eval_list_path']
batch_size=train_parameters['train_batch_size']

'''
划分训练集与验证集，乱序，生成数据列表
'''
#每次生成数据列表前，首先清空train.txt和eval.txt
with open(train_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
with open(eval_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
    
#生成数据列表   
get_data_list(target_path,train_list_path,eval_list_path)

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_05

class MyDataset(paddle.io.Dataset):
    """
    步骤一：继承paddle.io.Dataset类
    """
    def __init__(self, mode='train'):
        """
        步骤二：实现构造函数，定义数据集大小
        """
        super(MyDataset, self).__init__()
        self.data = []
        self.label = []
        if mode == 'train':
            with open(train_list_path, 'r') as f:
                lines = [line.strip() for line in f]
                for line in lines:
                    img_path, lab = line.strip().split('\t')
                    img = Image.open(img_path) 
                    if img.mode != 'RGB': 
                        img = img.convert('RGB') 
                    img = img.resize((224, 224), Image.BILINEAR)
                    img = np.array(img).astype('float32') 
                    img = img.transpose((2, 0, 1))  # HWC to CHW 
                    img = img/255               # 像素值归一化 
                    self.data.append(img)
                    self.label.append(int(lab))
        else:
            with open(eval_list_path, 'r') as f:
                lines = [line.strip() for line in f]
                for line in lines:
                    img_path, lab = line.strip().split('\t')
                    img = Image.open(img_path) 
                    if img.mode != 'RGB': 
                        img = img.convert('RGB') 
                    img = img.resize((224, 224), Image.BILINEAR)
                    img = np.array(img).astype('float32') 
                    img = img.transpose((2, 0, 1))  # HWC to CHW 
                    img = img/255               # 像素值归一化 
                    self.data.append(img)
                    self.label.append(int(lab))
            
    def __getitem__(self, index):
        """
        步骤三：实现__getitem__方法，定义指定index时如何获取数据，并返回单条数据（训练数据，对应的标签）
        """
        #返回单一数据和标签
        data = self.data[index]
        label = self.label[index]
        #注：返回标签数据时必须是int64
        return data, np.array(label).astype('int64')
    def __len__(self):
        """
        步骤四：实现__len__方法，返回数据集总数目
        """
        #返回数据总数
        return len(self.data)

测试数据集

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_06

# 测试定义的数据集
train_dataset = MyDataset(mode='train')
eval_dataset = MyDataset(mode='val')
print('=============train_dataset =============')
#输出数据集的形状和标签
print(train_dataset.__getitem__(1)[0].shape,train_dataset.__getitem__(1)[1])
#输出数据集的长度
print(train_dataset.__len__())
print('=============eval_dataset =============')
#输出数据集的形状和标签
for data, label in eval_dataset:
    print(data.shape, label)
    break
#输出数据集的长度
print(eval_dataset.__len__())

2.模型配置

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_07

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_人脸识别_08

class ConvPool(paddle.nn.Layer):
    '''卷积+池化'''
    def __init__(self,
                 in_channels,
                 out_channels,
                 filter_size,
                 pool_size,
                 pool_stride,
                 groups,
                 conv_stride=1,
                 conv_padding=1,
                 pool_type='max'
                 ):
        super(ConvPool, self).__init__()  

        self._conv2d_list = []

        for i in range(groups):
            conv2d = self.add_sublayer(   #返回一个由所有子层组成的列表。
                'bb_%d' % i,
                paddle.nn.Conv2D(in_channels=in_channels,out_channels=out_channels,kernel_size=filter_size,padding=conv_padding)
            )
            
            in_channels = out_channels
            self._conv2d_list.append(conv2d)

        if pool_type == 'avg':
            self._pool2d = paddle.nn.AvgPool2D(
                kernel_size=pool_size,           #池化核大小
                stride=pool_stride        #池化步长
            )
        elif pool_type == 'max':
            self._pool2d = paddle.nn.MaxPool2D(
                kernel_size=pool_size,           #池化核大小
                stride=pool_stride        #池化步长
            )

    def forward(self, inputs):
        x = inputs
        for conv in self._conv2d_list:
            x = conv(x)
            x = F.relu(x)
        x = self._pool2d(x)
        return x




class VGGNet(paddle.nn.Layer):
    '''
    VGG网络
    '''
    def __init__(self):
        super(VGGNet, self).__init__()
        
        self.convpool01 = ConvPool(
            3, 64, 3, 2, 2,2)  #3:通道数，64：卷积核个数，3:卷积核大小，2:池化核大小，2:池化步长，2:连续卷积个数
        self.convpool02 = ConvPool(
            64, 128, 3, 2, 2,2)
        self.convpool03 = ConvPool(
            128, 256, 3, 2, 2,3)
        self.convpool04 = ConvPool(
            256, 512, 3, 2, 2,3)
        self.convpool05 = ConvPool(
            512, 512, 3, 2, 2,3)
       
        
        self.pool_5_shape = 512 * 7* 7
        self.fc01 = paddle.nn.Linear(self.pool_5_shape,4096)
        self.fc02 = paddle.nn.Linear(4096,4096)
        self.fc03 = paddle.nn.Linear(4096,10)
        

    def forward(self, inputs):
        # print('input shape is {}'.format(inputs.shape)) #[8, 3, 224, 224]
        """前向计算"""
        out = self.convpool01(inputs)
        # print('after convpool01')
        # print(out.shape)           #[8, 64, 112, 112]
        out = self.convpool02(out)
        # print(out.shape)           #[8, 128, 56, 56]
        out = self.convpool03(out)
        # print(out.shape)           #[8, 256, 28, 28]
        out = self.convpool04(out)
        # print(out.shape)           #[8, 512, 14, 14]
        out = self.convpool05(out)
        # print(out.shape)           #[8, 512, 7, 7]         

        out = paddle.reshape(out, shape=[-1, 512*7*7])
        out = self.fc01(out)
        out = F.relu(out)
        out = self.fc02(out)
        out = F.relu(out)
        out = self.fc03(out)
        out = F.softmax(out)
        
        return out

3.模型训练及模型评估

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_人脸识别_09

'''
模型训练
'''
print('model')
model = paddle.Model(VGGNet())
# 定义损失函数
print('prepare')
model.prepare(paddle.optimizer.Adam(parameters=model.parameters()),paddle.nn.CrossEntropyLoss(),paddle.metric.Accuracy())
# 训练可视化VisualDL工具的回调函数
visualdl = paddle.callbacks.VisualDL(log_dir='visualdl_log')
# 启动模型全流程训练
print('fit')
model.fit(train_dataset,            # 训练数据集
          eval_dataset,            # 评估数据集
          epochs=3,            # 总的训练轮次
          batch_size = 16,    # 批次计算的样本量大小
          shuffle=True,             # 是否打乱样本集
          verbose=1,                # 日志展示格式
          save_dir='./chk_points/', # 分阶段的训练模型存储路径
          callbacks=[visualdl])     # 回调函数使用
#保存模型
model.save('model_save_dir')

4.模型预测

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_10

#定义标签列表
label_list = [ "liangjiahui", "zhoujielun", "liuyifei", "jiangwen",
            "yuwenle", "zhoudongyu", "pengyuyan", "fanbingbing","zhangziyi", "liudehua"]

print('测试数据集样本量：{}'.format(len(eval_dataset)))

#执行预测
result = model.predict(eval_dataset)

# 随机取样本展示
indexs = [1 ,2, 3, 4, 5, 6, 7]

for idx in indexs:
    predict_label = np.argmax(result[0][idx])
    real_label = eval_dataset.__getitem__(idx)[1]
    print('样本ID：{}, 真实标签：{}, 预测值：{}'.format(idx, label_list[real_label], label_list[predict_label]))

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_人脸识别_11

# 执行预测
result = model.predict(eval_dataset)

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_12

# 随机取样本展示
indexs = [1,2, 3, 4, 5, 6, 7]

for idx in indexs:
    predict_label = np.argmax(result[0][idx])
    real_label = eval_dataset.__getitem__(idx)[1]
    print('样本ID：{}, 真实标签：{}, 预测值：{}'.format(idx, label_list[real_label], label_list[predict_label]))

三、结果与分析

如图所示，VGG模型对于人脸识别，精确率非常低，且识别七个人，成功的数量只有一个，这结果是极其不理想的：

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_13

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle_14

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_15

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_16

当将参数改变为：

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_17

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_人脸识别_18

识别准确率0.1449；但实际测试效果还是不理想；

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_19

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_数据_20

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_人脸识别_21

当分类数设置为20，精度提高到0.2101；但识别结果还是一如既往的不理想；

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_22

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_数据_23

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_数据_24

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_25

当分类数设置为50

精度和分类数在20时的结果相同；实际预测结果也相同；

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_26

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_27

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_数据_28

重组

由于结果并不尽如人意，重新查找了资料，代码如下：

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle_29

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_30

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle_31

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_32

#导入要用到的模块
import paddle
import paddle.fluid as fluid
import numpy as np
import sys
from multiprocessing import cpu_count
import matplotlib.pyplot as plt
from PIL import Image
import os
import math
from sklearn.metrics import confusion_matrix
import pandas as pd

import os
import json

# 设置要生成文件的路径
data_root_path = '/home/aistudio/images/face'
# 所有类别的信息
class_detail = []
# 获取所有类别保存的文件夹名称
class_dirs = os.listdir(data_root_path)
# 类别标签
class_label = 0
# 获取总类别的名称
father_paths = data_root_path.split('/')    #['', 'home', 'aistudio', 'images', 'face']
while True:
    if father_paths[father_paths.__len__() - 1] == '':
        del father_paths[father_paths.__len__() - 1]
    else:
        break
father_path = father_paths[father_paths.__len__() - 1]
# 把生产的数据列表都放在自己的总类别文件夹中
data_list_path = '/home/aistudio/%s/' % father_path
# 如果不存在这个文件夹,就创建
isexist = os.path.exists(data_list_path)
if not isexist:
    os.makedirs(data_list_path)
# 清空原来的数据
with open(data_list_path + "test.list", 'w') as f:
    pass
with open(data_list_path + "trainer.list", 'w') as f:
    pass
# 总的图像数量
all_class_images = 0
# 读取每个类别
for class_dir in class_dirs:
    # 每个类别的信息
    class_detail_list = {}
    test_sum = 0
    trainer_sum = 0
    # 统计每个类别有多少张图片
    class_sum = 0
    # 获取类别路径
    path = data_root_path + "/" + class_dir
    # 获取所有图片
    img_paths = os.listdir(path)

    for img_path in img_paths:                                  # 遍历文件夹下的每个图片
        name_path = path + '/' + img_path                       # 每张图片的路径
        if class_sum % 10 == 0:                                 # 每10张图片取一个做测试数据
            test_sum += 1                                       #test_sum测试数据的数目
            with open(data_list_path + "test.list", 'a') as f:
                f.write(name_path + "\t%d" % class_label + "\n") #class_label 标签：0,1,2
        else:
            trainer_sum += 1                                    #trainer_sum测试数据的数目
            with open(data_list_path + "trainer.list", 'a') as f:
                f.write(name_path + "\t%d" % class_label + "\n")#class_label 标签：0,1,2
        class_sum += 1                                          #每类图片的数目
        all_class_images += 1                                   #所有类图片的数目

    # 说明的json文件的class_detail数据
    class_detail_list['class_name'] = class_dir             #类别名称，如jiangwen
    class_detail_list['class_label'] = class_label          #类别标签，0,1,2，3，4，5
    class_detail_list['class_test_images'] = test_sum       #该类数据的测试集数目
    class_detail_list['class_trainer_images'] = trainer_sum #该类数据的训练集数目
    class_detail.append(class_detail_list)         
    class_label += 1                                            #class_label 标签：0,1,2
# 获取类别数量
all_class_sum = class_dirs.__len__()
# 说明的json文件信息
readjson = {}
readjson['all_class_name'] = father_path                  #文件父目录
readjson['all_class_sum'] = all_class_sum                #
readjson['all_class_images'] = all_class_images
readjson['class_detail'] = class_detail
jsons = json.dumps(readjson, sort_keys=True, indent=4, separators=(',', ': '))
with open(data_list_path + "readme.json",'w') as f:
    f.write(jsons)
print ('生成数据列表完成！')

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_数据_33

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_数据_34

# 定义训练的mapper
# train_mapper函数的作用是用来对训练集的图像进行处理修剪和数组变换，返回img数组和标签 
# sample是一个python元组，里面保存着图片的地址和标签。 ('../images/face/zhangziyi/20181206145348.png', 2)
def train_mapper(sample):
    img, label = sample
    # 进行图片的读取，由于数据集的像素维度各不相同，需要进一步处理对图像进行变换
    img = paddle.dataset.image.load_image(img)       
    #进行了简单的图像变换，这里对图像进行crop修剪操作，输出img的维度为(3, 100, 100)
    img = paddle.dataset.image.simple_transform(im=img,          #输入图片是HWC   
                                                resize_size=100, # 剪裁图片
                                                crop_size=100, 
                                                is_color=True,  #彩色图像
                                                is_train=True)
    #将img数组进行进行归一化处理，得到0到1之间的数值
    img= img.flatten().astype('float32')/255.0
    return img, label
# 对自定义数据集创建训练集train的reader
def train_r(train_list, buffered_size=1024):
    def reader():
        with open(train_list, 'r') as f:
            # 将train.list里面的标签和图片的地址方法一个list列表里面，中间用\t隔开'
            #../images/face/jiangwen/0b1937e2-f929-11e8-8a8a-005056c00008.jpg\t0'
            lines = [line.strip() for line in f]
            for line in lines:
                # 图像的路径和标签是以\t来分割的,所以我们在生成这个列表的时候,使用\t就可以了
                img_path, lab = line.strip().split('\t')
                yield img_path, int(lab) 
    # 创建自定义数据训练集的train_reader
    return paddle.reader.xmap_readers(train_mapper, reader,cpu_count(), buffered_size)

# sample是一个python元组，里面保存着图片的地址和标签。 ('../images/face/zhangziyi/20181206145348.png', 2)
def test_mapper(sample):
    img, label = sample
    img = paddle.dataset.image.load_image(img)
    img = paddle.dataset.image.simple_transform(im=img, resize_size=100, crop_size=100, is_color=True, is_train=False)
    img= img.flatten().astype('float32')/255.0
    return img, label

# 对自定义数据集创建验证集test的reader
def test_r(test_list, buffered_size=1024):
    def reader():
        with open(test_list, 'r') as f:
            lines = [line.strip() for line in f]
            for line in lines:
                #图像的路径和标签是以\t来分割的,所以我们在生成这个列表的时候,使用\t就可以了
                img_path, lab = line.strip().split('\t')
                yield img_path, int(lab)

    return paddle.reader.xmap_readers(test_mapper, reader,cpu_count(), buffered_size)

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_35

BATCH_SIZE = 128
BUF_SIZE = 512
# 把图片数据生成reader
trainer_reader = train_r(train_list="/home/aistudio/face/trainer.list")
train_reader = paddle.batch(
    paddle.reader.shuffle(
        reader=trainer_reader,buf_size=BUF_SIZE),
    batch_size=BATCH_SIZE)

tester_reader = test_r(test_list="/home/aistudio/face/test.list")
test_reader = paddle.batch(
     tester_reader, batch_size=BATCH_SIZE)

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle_36

train_data = paddle.batch(trainer_reader,
                            batch_size=3)
sampledata=next(train_data())
print(sampledata)

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle_37

#激活函数为Relu
act_function = 'relu'

#激活函数为Sigmoid
#act_function = 'sigmoid'

#激活函数为Tanh
#act_function = 'tanh'

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_38

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_数据_39

def cnn(image, type_size):
    # 第一个卷积--池化层
    conv_pool_1 = fluid.nets.simple_img_conv_pool(input=image,# 输入图像
                                                       filter_size=3,# 滤波器的大小
                                                       num_filters=32,# filter 的数量。它与输出的通道相同
                                                       pool_size=2,# 池化层大小2*2
                                                       pool_stride=2,# 池化层步长
                                                       act=act_function) # 激活类型
    
    # Dropout主要作用是减少过拟合，随机让某些权重不更新  
    # Dropout是一种正则化技术，通过在训练过程中阻止神经元节点间的联合适应性来减少过拟合。
    # 根据给定的丢弃概率dropout随机将一些神经元输出设置为0，其他的仍保持不变。
    drop = fluid.layers.dropout(x=conv_pool_1, dropout_prob=0.5)
    
    # 第二个卷积--池化层
    conv_pool_2 = fluid.nets.simple_img_conv_pool(input=drop,
                                                       filter_size=3,
                                                       num_filters=64,
                                                       pool_size=2,
                                                       pool_stride=2,
                                                       act=act_function)
    # 减少过拟合，随机让某些权重不更新                                                   
    drop = fluid.layers.dropout(x=conv_pool_2, dropout_prob=0.5)
    
    # 第三个卷积--池化层
    conv_pool_3 = fluid.nets.simple_img_conv_pool(input=drop,
                                                       filter_size=3,
                                                       num_filters=64,
                                                       pool_size=2,
                                                       pool_stride=2,
                                                       act=act_function)
    # 减少过拟合，随机让某些权重不更新                                                   
    drop = fluid.layers.dropout(x=conv_pool_3, dropout_prob=0.5)
    
    # 全连接层
    fc = fluid.layers.fc(input=drop, size=512, act=act_function)
    # 减少过拟合，随机让某些权重不更新                                                   
    drop =  fluid.layers.dropout(x=fc, dropout_prob=0.5)                                                   
    # 输出层 以softmax为激活函数的全连接输出层，输出层的大小为图像类别type_size个数
    predict = fluid.layers.fc(input=drop,size=type_size,act='softmax')
    
    return predict

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_人脸识别_40

def vgg(image, type_size):  
    def conv_block(ipt, num_filter, groups, dropouts):
        return fluid.nets.img_conv_group(
            input=ipt, # 具有[N，C，H，W]格式的输入图像
            pool_size=2,
            pool_stride=2,
            conv_num_filter=[num_filter] * groups, # 过滤器个数
            conv_filter_size=3, # 过滤器大小
            conv_act=act_function,
            conv_with_batchnorm=True, # 表示在 Conv2d Layer 之后是否使用 BatchNorm
            conv_batchnorm_drop_rate=dropouts,# 表示 BatchNorm 之后的 Dropout Layer 的丢弃概率
            pool_type='max') # 最大池化

    conv1 = conv_block(image, 64, 2, [0.0, 0])
    conv2 = conv_block(conv1, 128, 2, [0.0, 0])
    conv3 = conv_block(conv2, 256, 3, [0.0, 0.0, 0])
    conv4 = conv_block(conv3, 512, 3, [0.0, 0.0, 0])
    conv5 = conv_block(conv4, 512, 3, [0.0, 0.0, 0])

    drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
    fc1 = fluid.layers.fc(input=drop, size=512, act=None)
    
    bn = fluid.layers.batch_norm(input=fc1, act=act_function)
    drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.0)
    fc2 = fluid.layers.fc(input=drop2, size=512, act=None)
    predict = fluid.layers.fc(input=fc2, size=type_size, act='softmax')
    return predict

【定义数据层】

image 和 label 是通过 fluid.layers.data 创建的两个输入数据层。其中 image 是 [3, 100, 100] 维度的浮点数据; label 是 [1] 维度的整数数据。这里需要注意的是: Fluid中默认使用 -1 表示 batch size 维度，默认情况下会在 shape 的第一个维度添加 -1 。所以上段代码中，我们可以接受将一个 [-1, 3, 100, 100] 的numpy array传给 image 。Fluid中用来做类别标签的数据类型是 int64，并且标签从0开始。

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_数据_41

image = fluid.layers.data(name='image', shape=[3, 100, 100], dtype='float32')#[3, 100, 100]，表示为三通道，100*100的RGB图

label = fluid.layers.data(name='label', shape=[1], dtype='int64')
print('image_shape:',image.shape)
image_shape: (-1, 3, 100, 100)

分类器选择：type_size要和需要分类的类别数量保持一致

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_42

#获取分类器，用cnn或者vgg网络进行分类type_size要和训练的类别一致
TYPESIZE = 6

#predict = cnn(image=image, type_size = TYPESIZE)
predict = vgg(image=image, type_size = TYPESIZE)

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle_43

损失函数：这次使用的是交叉熵损失函数，该函数在分类任务上比较常用。定义了一个损失函数之后，还有对它求平均值，因为定义的是一个Batch的损失值。同时我们还可以定义一个准确率函数，这个可以在我们训练的时候输出分类的准确率。

# 获取损失函数和准确率
cost = fluid.layers.cross_entropy(input=predict, label=label)
# 计算cost中所有元素的平均值
avg_cost = fluid.layers.mean(cost)
#计算准确率
acc = fluid.layers.accuracy(input=predict, label=label)

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle_44

# 学习率
#LEARNING_RATE = 0.1
LEARNING_RATE = 0.025
#LEARNING_RATE = 0.0005
#LEARNING_RATE = 0.0015
#LEARNING_RATE = 0.001

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_json_45

# Adam 优化器
#optimizer = fluid.optimizer.Adam(learning_rate=LEARNING_RATE)
# SGD 优化器
optimizer = fluid.optimizer.SGD(learning_rate=LEARNING_RATE)
# RMSProp 优化器
#optimizer = fluid.optimizer.RMSProp(learning_rate=LEARNING_RATE)
# Adagrad 优化器
#optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=LEARNING_RATE)

optimizer.minimize(avg_cost)
print(type(acc))

在上述模型配置完毕后，得到两个fluid.Program：fluid.default_startup_program() 与fluid.default_main_program() 配置完毕了。

参数初始化操作会被写入fluid.default_startup_program()：用于获取默认或全局main program(主程序)。该主程序用于训练和测试模型。fluid.layers 中的所有layer函数可以向 default_main_program 中添加算子和变量。default_main_program 是fluid的许多编程接口（API）的Program参数的缺省值。例如,当用户program没有传入的时候， Executor.run() 会默认执行 default_main_program 。

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle_46

# 使用GPU进行训练
# CPU
#place = fluid.CPUPlace()
# GPU
place = fluid.CUDAPlace(0)

test_program = fluid.default_main_program().clone(for_test=True)
# 创建一个executor
exe = fluid.Executor(place)
# 对program进行参数初始化1.网络模型2.损失函数3.优化函数
exe.run(fluid.default_startup_program())

# 定义输入数据的维度,DataFeeder 负责将reader(读取器)返回的数据转成一种特殊的数据结构，使它们可以输入到 Executor
feeder = fluid.DataFeeder(feed_list=[image, label], place=place)#定义输入数据的维度，第一个是图片数据，第二个是图片对应的标签。

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_52

all_train_iter=0
all_train_iters=[]
all_train_costs=[]
all_train_accs=[]

def draw_train_process(title,iters,costs,accs,label_cost,lable_acc):
    plt.title(title, fontsize=24)
    plt.xlabel("iter", fontsize=20)
    plt.ylabel("cost/acc", fontsize=20)
    plt.plot(iters, costs,color='red',label=label_cost) 
    plt.plot(iters, accs,color='green',label=lable_acc) 
    plt.legend()
    plt.grid()
    plt.show()

【训练并保存模型】

Executor接收传入的program,并根据feed map(输入映射表)和fetch_list(结果获取表) 向program中添加feed operators(数据输入算子)和fetch operators（结果获取算子)。feed map为该program提供输入数据。fetch_list提供program训练结束后用户预期的变量。这次训练5个Pass。每一个Pass训练结束之后，再使用验证集进行验证，并求出相应的损失值Cost和准确率acc。

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_53

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_人脸识别_54

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_paddle_55

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_人脸识别_56

# 训练的轮数
EPOCH_NUM = 25
print('开始训练...')
model_save_dir = "/home/aistudio/data/model"
for pass_id in range(EPOCH_NUM):
    #train_cost = 0
    for batch_id, data in enumerate(train_reader()):                         #遍历train_reader的迭代器，并为数据加上索引batch_id
        train_cost, train_acc = exe.run(
            program=fluid.default_main_program(),                            #运行主程序
            feed=feeder.feed(data),                                          #喂入一个batch的数据
            fetch_list=[avg_cost, acc])                                 #fetch均方误差和准确率
        
        all_train_iter=all_train_iter+BATCH_SIZE
        all_train_iters.append(all_train_iter)
        all_train_costs.append(train_cost[0])
        all_train_accs.append(train_acc[0])
       
       
        if batch_id % 10 == 0:                                               #每10次batch打印一次训练、进行一次测试
            print("\n轮数:%d, 次数:%d, 损失率:%f, 准确率:%f" % 
            (pass_id, batch_id, train_cost[0], train_acc[0]))

    # 开始测试
    test_accs = []                                                            #测试的损失值
    test_costs = []
    lists = []                                                           #测试的准确率
    # 每训练一轮 进行一次测试
    for batch_id, data in enumerate(test_reader()):                           # 遍历test_reader
         test_cost, test_acc,data_predict = exe.run(program=test_program,  # #运行测试测试程序
                                       feed=feeder.feed(data),                #喂入一个batch的数据
                                       fetch_list=[avg_cost, acc,predict])       #fetch均方误差、准确率
         test_accs.append(test_acc[0])                                        #记录每个batch的误差
         test_costs.append(test_cost[0])                                      #记录每个batch的准确率

         if batch_id==0:
            all_predict=data_predict
            all_data=data
         else:
            all_predict = np.vstack((all_predict,data_predict))
            all_data = np.vstack((all_data,data))

   # 求测试结果的平均值
    test_cost = (sum(test_costs) / len(test_costs))                           # 每轮的平均误差
    test_acc = (sum(test_accs) / len(test_accs))                              # 每轮的平均准确率
    lists.append((pass_id,test_cost,test_acc))
    print('第%d轮, 平均损失率:%0.5f, 平均准确率:%0.5f' % (pass_id, test_cost, test_acc))
    
   
# 如果保存路径不存在就创建
if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir)
print ('save models to %s' % (model_save_dir))
# 保存训练的模型，executor 把所有相关参数保存到 dirname 中
fluid.io.save_inference_model(model_save_dir,   #保存推理model的路径
                                  ['image'],    #推理（inference）需要 feed 的数据
                                  [predict],    #保存推理（inference）结果的 Variables
                                  exe)          #executor 保存 inference model   

best = sorted(lists, key=lambda list: list[1])[0]
print("最好为第 %d 轮数,平均损失率：%0.5f " % (best[0], best[1]))
print("分类准确度：%0.2f%%" % (best[2]*100))    

#绘制准确率与损失率图              
draw_train_process("training",all_train_iters,all_train_costs,all_train_accs,"trainning cost","trainning acc")
all_test_label = np.array([d[1] for d in all_data]).astype("float32")

#输出混淆矩阵
confusion_m = pd.DataFrame(confusion_matrix(all_test_label, np.argmax(all_predict, 1)), columns=[i for i in range(6)])
print(confusion_m)

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_57

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_VGG_58

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_数据_59

人工智能|使用 VGG 网络对人脸识别数据集进行训练并预测，最后对模型进行评估_数据_60

# coding:utf-8
import paddle.fluid as fluid
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import paddle

# 使用GPU进行训练
place = fluid.CUDAPlace(0)
# CPU
#place = fluid.CPUPlace()

# 定义一个executor
infer_exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()#指明它网络运行所在的域
#训练模型保存路径
params_dirname ='/home/aistudio/data/model'

# （1）图片预处理
def load_image(path):
    img = paddle.dataset.image.load_and_transform(path,100,100, False).astype('float32')#img.shape是(3, 100, 100)
    img = img / 255.0 
    return img

infer_imgs = []
# 选择图片进行预测
infer_path='/home/aistudio/work/pengyuyan.png'  


infer_imgs.append(load_image(infer_path))    #加载图片
infer_imgs = np.array(infer_imgs)
print('infer_imgs的维度：',infer_imgs .shape)

#fluid.scope_guard修改全局/默认作用域（scope）, 运行时中的所有变量都将分配给新的scope
with fluid.scope_guard(inference_scope):
    #获取训练好的模型
    #从指定目录中加载 推理model(inference model)
    [inference_program,# 预测用的program
     feed_target_names,# 是一个str列表，它包含需要在推理 Program 中提供数据的变量的名称。
     fetch_targets] = fluid.io.load_inference_model(params_dirname, infer_exe)#fetch_targets：是一个 Variable 列表，从中我们可以得到推断结果。

    img = Image.open(infer_path)
    plt.imshow(img)   #根据数组绘制图像
    plt.show()        #显示图像

    # 开始预测
    results = infer_exe.run(
        inference_program,                      #运行预测程序
        feed={feed_target_names[0]: infer_imgs},#喂入要预测的数据
        fetch_list=fetch_targets)               #得到推测结果
    print('results:',np.argmax(results[0]))

    print(results)
    print("预测结果为: %s" % class_detail[np.argmax(results[0])]['class_name'])

经过不断学习，各方资源查找，最终成功识别出人像，经过25轮迭代训练且最高识别准确率为0.960938，平均准确率也有84.85%。