总体简介
本项目主要根据 github 项目 对如何接入 TIPC、如何支持 serving 进行介绍,关于更多原项目的信息可以查看 README.md
进行了解。另外,也可以参考官方的 TIPC 和 Serving 的示例来将自己的模型接入 TIPC 和进行 Serving 支持。
TIPC 基础链条
♣ 简介
其实,我们想做的就是用一个 shell 脚本去读取 txt 文件,然后到达 train、eval、动转静和 infer 的目的。我们希望只通过运行一个 shell 脚本,让使用者可以完成上述的一项或者多项功能。
♠ 具体流程
那么既然我们要进行 train、eval、动转静和 infer,首先我们得写好他们的 py 脚本文件。一般对应的分别是 train.py、eval.py、export_model.py 和 infer.py,前面两个我们一般在写算法的时候都会写,所以我来介绍一下后面两个脚本文件。
模型的动转静
这里其实主要是指模型由动态图转为静态图供后面的 inference 等使用。具体详细代码可以根据目录 ppdet/engine/trainer.py
中的第 531 行进行修改。:
import os
import paddle
from paddle.static import InputSpec
def export(model, model_name, output_dir):
# 将模型调整为 eval 模式
model.eval()
# 设置保存静态图模型的地址
save_dir = os.path.join(output_dir, model_name)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# 设置输入 shape,如果是动态 shape 则设置为 [3, -1, -1]
image_shape = [3, -1, -1]
# 设置需要的其他输入
input_spec = [{
"image": InputSpec(
shape=[None] + image_shape, name='image'),
"im_shape": InputSpec(
shape=[None, 2], name='im_shape'),
"scale_factor": InputSpec(
shape=[None, 2], name='scale_factor')
}]
static_model = paddle.jit.to_static(self.model, input_spec=input_spec)
# NOTE: dy2st do not pruned program, but jit.save will prune program
# input spec, prune input spec here and save with pruned input spec
pruned_input_spec = _prune_input_spec(
input_spec,
static_model.forward.main_program,
static_model.forward.outputs
)
# 保存模型
paddle.jit.save(
static_model,
os.path.join(save_dir, 'model'),
input_spec=pruned_input_spec
)
def _prune_input_spec(input_spec, program, targets):
# try to prune static program to figure out pruned input spec
# so we perform following operations in static mode
paddle.enable_static()
pruned_input_spec = [{}]
program = program.clone()
program = program._prune(targets=targets)
global_block = program.global_block()
for name, spec in input_spec[0].items():
try:
v = global_block.var(name)
pruned_input_spec[0][name] = spec
except Exception:
pass
paddle.disable_static()
return pruned_input_spec
代码编写完成后,执行 tools/export_model.py
进行测试,静态图模型会被保存至 output_inference/retinanet_r50_fpn_1x_coco
(具体保存位置根据自己编写的程序而定)。
# 安装所需依赖
!pip install -r requirements.txt
!python tools/export_model.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o weights=/home/aistudio/data/data104154/best_model.pdparams
模型的 Inference
这里可以使用 argparse 来接收用命令行启动时输入的参数。关于这部分以及更多可视化、视频推理和预处理 operator 的编写,可以参考目录:deploy/python/infer.py
、deploy/python/preprocess.py
import os
import yaml
import numpy as np
import math
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
# RetinaNet-Based-on-PPdet-main/deploy/python/utils.py
# import 一些参数的设置、时间计算类、内存使用计算类
from utils import argsparser, Timer, get_current_memory_mb
class Detector(object):
def __init__(self,
pred_config,
model_dir,
device='CPU',
run_mode='fluid',
batch_size=1,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
self.pred_config = pred_config
# 初始化 predictor 主要进行一些 cpu\gpu、Mkldnn\TensorRT 等相关设置
self.predictor, self.config = load_predictor(
model_dir,
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn)
# 这两个函数用来记录时间和内存
self.det_times = Timer()
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
# 预处理函数
def preprocess(self, image_list):
# 这里主要是进行对 operator 进行设置
# 如果预处理固定,也可以参考以下 link,直接 Compose 在一起就行了
# https://github.com/littletomatodonkey/AlexNet-Prod/blob/tipc/pipeline/Step5/AlexNet_paddle/deploy/inference_python/infer.py#L48
preprocess_ops = []
for op_info in self.pred_config.preprocess_infos:
new_op_info = op_info.copy()
op_type = new_op_info.pop('type')
preprocess_ops.append(eval(op_type)(**new_op_info))
input_im_lst = []
input_im_info_lst = []
for im_path in image_list:
im, im_info = preprocess(im_path, preprocess_ops)
input_im_lst.append(im)
input_im_info_lst.append(im_info)
# 将进行预处理的输入转换为模型需要的格式(根据自己的模型来,也许不需要这个函数)
inputs = create_inputs(input_im_lst, input_im_info_lst)
return inputs
# 后处理
def postprocess(self,
np_boxes,
np_masks,
inputs,
np_boxes_num,
threshold=0.5):
# postprocess output of predictor
results = {}
results['boxes'] = np_boxes
results['boxes_num'] = np_boxes_num
if np_masks is not None:
results['masks'] = np_masks
return results
def predict(self, image_list, threshold=0.5, warmup=0, repeats=1):
# 前处理获得模型需要的输入
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image_list)
self.det_times.preprocess_time_s.end()
np_boxes, np_masks = None, None
input_names = self.predictor.get_input_names()
# 获取输入输出的 name
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])
for i in range(warmup):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
if self.pred_config.mask:
masks_tensor = self.predictor.get_output_handle(output_names[2])
np_masks = masks_tensor.copy_to_cpu()
self.det_times.inference_time_s.start()
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
boxes_num = self.predictor.get_output_handle(output_names[1])
np_boxes_num = boxes_num.copy_to_cpu()
if self.pred_config.mask:
masks_tensor = self.predictor.get_output_handle(output_names[2])
np_masks = masks_tensor.copy_to_cpu()
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start()
results = []
# 经过后处理返回结果
if reduce(lambda x, y: x * y, np_boxes.shape) < 6:
print('[WARNNING] No object detected.')
results = {'boxes': np.array([[]]), 'boxes_num': [0]}
else:
results = self.postprocess(
np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += len(image_list)
return results
def get_timer(self):
return self.det_times
def load_predictor(model_dir,
run_mode='fluid',
batch_size=1,
device='CPU',
min_subgraph_size=3,
use_dynamic_shape=False,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
if device != 'GPU' and run_mode != 'fluid':
raise ValueError(
"Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
.format(run_mode, device))
# 利用 inference 的 api 将模型加载进来
config = Config(
os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams')
)
if device == 'GPU':
# initial GPU memory(M), device ID
config.enable_use_gpu(200, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
elif device == 'XPU':
config.enable_xpu(10 * 1024 * 1024)
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
if enable_mkldnn:
try:
# cache 10 different shapes for mkldnn to avoid memory leak
config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn()
except Exception as e:
print(
"The current environment does not support `mkldnn`, so disable mkldnn."
)
pass
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if run_mode in precision_map.keys():
config.enable_tensorrt_engine(
workspace_size=1 << 10,
max_batch_size=batch_size,
min_subgraph_size=min_subgraph_size,
precision_mode=precision_map[run_mode],
use_static=False,
use_calib_mode=trt_calib_mode)
if use_dynamic_shape:
min_input_shape = {
'image': [batch_size, 3, trt_min_shape, trt_min_shape]
}
max_input_shape = {
'image': [batch_size, 3, trt_max_shape, trt_max_shape]
}
opt_input_shape = {
'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]
}
config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
opt_input_shape)
print('trt set dynamic shape done!')
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = create_predictor(config)
return predictor, config
'''
主要分为 bs 是否等 1 处理,考虑到可能要 padding。
另外把需要的字段及相关信息放到一个字典里,然后返回。
'''
def create_inputs(imgs, im_info):
inputs = {}
im_shape = []
scale_factor = []
if len(imgs) == 1:
inputs['image'] = np.array((imgs[0], )).astype('float32')
inputs['im_shape'] = np.array(
(im_info[0]['im_shape'], )).astype('float32')
inputs['scale_factor'] = np.array(
(im_info[0]['scale_factor'], )).astype('float32')
return inputs
for e in im_info:
im_shape.append(np.array((e['im_shape'], )).astype('float32'))
scale_factor.append(np.array((e['scale_factor'], )).astype('float32'))
inputs['im_shape'] = np.concatenate(im_shape, axis=0)
inputs['scale_factor'] = np.concatenate(scale_factor, axis=0)
imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs]
max_shape_h = max([e[0] for e in imgs_shape])
max_shape_w = max([e[1] for e in imgs_shape])
padding_imgs = []
for img in imgs:
im_c, im_h, im_w = img.shape[:]
padding_im = np.zeros(
(im_c, max_shape_h, max_shape_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = img
padding_imgs.append(padding_im)
inputs['image'] = np.stack(padding_imgs, axis=0)
return inputs
可以对编写好的 inference 代码,加载刚才导出的静态图模型,对图片进行测试
!python ./deploy/python/infer.py --device=gpu --model_dir=output_inference/retinanet_r50_fpn_1x_coco --batch_size=1 --image_dir=demo
结果的可视化会被保存至 .\output
:

关于 txt 命令配置文件和 shell 脚本的编写 Inference
- txt 命令配置文件
shell 脚本在读取 txt 文件的信息是通过行数来读取的,是行对应的。比如,在 shell 脚本中编写在 txt 文件中的第 4 行(从 0 开始计数)读取是否使用 gpu,此时你如果把 use_gpu:True
写到了第 6 行,那么 shell 这边读到就是错误的信息。因此,不要随意添加或者删除行。如果必须添加或删除行,那么则需要同时修改对应的 shell 脚本。
以下注释正式使用的时候可以删除(注意用占行的双 # 号不要删除)。
===========================train_params===========================
model_name:retinanet_r50_fpn_1x_coco # 模型名称一般用作储存输出的文件夹名字
python:python3.7
gpu_list:0 # 一般设置为 0,暂不支持多 gpu
use_gpu:True # 是否使用 gpu,如果除了 gpu 还要测试 cpu,则写为 True|False
auto_cast:null|amp # 是否测试半精度
epoch:lite_train_lite_infer=2|lite_train_whole_infer=1|whole_train_whole_infer=12
save_dir:tipc/train_infer_python/output/retinanet_r50 # output 保存的地址
TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_train_whole_infer=1
pretrain_weights:/home/aistudio/data/data104154/best_model.pdparams # 预训练权重位置,也可以放最优权重
train_model_name:model_final.pdparams # train 保存的权重名称
train_infer_img_dir:./dataset/coco/test2017/ # 数据集位置
filename:retinanet_r50_fpn_1x_coco
##
trainer:norm_train # 这里只对 norm_train 模式验证就好了
norm_train:tools/train.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o # 这里是运行 train 的命令行 一般需要修改的是 -c 后面的配置文件位置
pact_train:null
fpgm_train:null
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:tools/eval.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o # 这里是运行 eval 的命令行
null:null
##
===========================infer_params===========================
--output_dir:./output_infer/python/retinanet_r50 # inference 的输出位置
weights:/home/aistudio/data/data104154/best_model.pdparams # 进行 inference 载入的最优权重
norm_export:tools/export_model.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o # 静态图模型导出的命令行
pact_export:null
fpgm_export:null
distill_export:null
export1:null
export2:null
kl_quant_export:null
##
infer_mode:norm
infer_quant:False
inference:./deploy/python/infer.py # infer 脚本的位置
--device:gpu|cpu # 对 gpu、cpu 都进行测试
--enable_mkldnn:True|False # cpu 测试时是否使用 mkldnn
--cpu_threads:1|4 # 测试不同线程
--batch_size:1
--use_tensorrt:null # gpu 测试时是否使用 tensorrt
--run_mode:fluid # 这里可以对 tensorrt 的精度进行设置,本次不要求
--model_dir:tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null/retinanet_r50_fpn_1x_coco # 进行 inference 载入的权重
--image_dir:./dataset/coco/test2017/ # inference 需要的图片
--save_log_path:null
--run_benchmark:False
--trt_max_shape:1600
对于 inference 中运行 infer.py 配置的其他的参数如 device、enable_mkldnn 等,就是输入到前面代码中的 load_predictor
函数对 predictor 进行初始化。
- 关于 shell 脚本文件。其实关于没有太多需要修改的地方,可能需要根据自己写的 txt 文件和 py 脚本文件需要的参数输入。
- 可能需要对自己在 txt 增加的行进行解析,参照下述前两行,并且更新其他参数更新的行数(因为你增加了一行,那么在它之后的参数解析中
$(func_parser_key "${lines[i]}")
的 i 就应该加 1。 - 根据自己的 py 脚本需要的参数,进行增删 args 或 kwargs 如下述中的,
"${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_autocast}"
- 可能需要对自己在 txt 增加的行进行解析,参照下述前两行,并且更新其他参数更新的行数(因为你增加了一行,那么在它之后的参数解析中
your_params_key=$(func_parser_key "${lines[4]}")
your_params_value=$(func_parser_value "${lines[4]}")
cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_autocast}"
模型的轻量化验证
其实如果走通了原模型的 TIPC 验证,那么轻量化验证的 TIPC 就很容易了,这里以 mobilenetv1 为例,首先将原模型的 backbone 替换为 mobilenetv1。需要确定的是自己需要返回哪几层的特征,这里可以通过源码阅读了解网络结构得到: RetinaNet-Based-on-PPdet-main/ppdet/modeling/backbones/mobilenet_v1.py
,这里需要返回的是第 4, 6, 13 层的特征,这几层的 stride 刚好是 8, 16 和 32。预训练权重也得使用 mobilenetv1 的。可能优化器方面也需要进行修改,其余的都可以使用原来的:
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/retinanet_r50_fpn.yml',
'_base_/optimizer_1x.yml',
'_base_/retinanet_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams
RetinaNet:
backbone: MobileNet
MobileNet:
scale: 1
feature_maps: [4, 6, 13]
with_extra_blocks: false
extra_block_filters: []
然后训练获得最优权重后,将原来的 txt 文件的相关部分修改为 mobilenetv1 的内容(比如最优权重位置换为 mobilenetv1 的),这里两个对比的文件为:
tipc/train_infer_python/configs/retinanet/retinanet_mobilenet_v1_fpn_1x_coco.txt
tipc/train_infer_python/configs/retinanet/retinanet_r50_fpn_1x_coco.txt
♦ 效果体验与展示
更多详细的介绍可以参照文件 tipc/train_infer_python/README.md
- 安装各种依赖和准备数据集
!pip install -r requirements.txt
!bash tipc/train_infer_python/prepare.sh tipc/train_infer_python/configs/retinanet/retinanet_r50_fpn_1x_coco.txt 'lite_train_lite_infer'
- 对 retinanet_r50_fpn_1x_coco 使用少量数据进行训练和推理
!bash tipc/train_infer_python/test_train_inference_python.sh tipc/train_infer_python/configs/retinanet/retinanet_r50_fpn_1x_coco.txt 'lite_train_lite_infer'
如果运行成功则会输出,如下:
Run successfully with command - python3.7 tools/train.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o use_gpu=True save_dir=tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null epoch=2 pretrain_weights=/home/aistudio/data/data104154/best_model.pdparams TrainReader.batch_size=2 filename=retinanet_r50_fpn_1x_coco !
Run successfully with command - python3.7 tools/eval.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o weights=tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null/retinanet_r50_fpn_1x_coco/model_final.pdparams use_gpu=True !
Run successfully with command - python3.7 tools/export_model.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o weights=tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null/retinanet_r50_fpn_1x_coco/model_final.pdparams filename=retinanet_r50_fpn_1x_coco --output_dir=./output_infer/python/retinanet_r50 !
Run successfully with command - python3.7 ./deploy/python/infer.py --device=gpu --run_mode=fluid --model_dir=./output_infer/python/retinanet_r50/retinanet_r50_fpn_1x_coco --batch_size=1 --image_dir=./dataset/coco/test2017/ --run_benchmark=False --trt_max_shape=1600 --output_dir=./output_infer/python/retinanet_r50 > tipc/train_infer_python/output/retinanet_r50/python_infer_gpu_precision_fluid_batchsize_1.log 2>&1 !
- 对 retinanet_mobilenet_v1_fpn_1x_coco 使用少量数据进行训练和推理
!bash tipc/train_infer_python/test_train_inference_python.sh tipc/train_infer_python/configs/retinanet/retinanet_mobilenet_v1_fpn_1x_coco.txt 'lite_train_lite_infer'
如果运行成功也会输出上述信息,输出和 log 文件保存至 tipc/train_infer_python/output
。将输出进行可视化如下:

Serving支持
♣ 简介
serving 部署需要做的事就是在服务端加载好模型,然后通过客户端访问,并且进行推理。这里加载的模型是由静态图模型转换而来的,所以需要像之前一样进行静态图模型的导出。
♠ 具体流程
如果我们已经做过 TIPC 了,那么我们就已经有该模型的静态图模型了。那么我们对静态图模型进行转换得到需要的 serving 模型后,主要进行服务端、客户端以及一些前后预处理,最后再把它连入 TIPC,就是跟之前一样写一个 shell 脚本和对应的 txt 文件。
环境的准备
首先,安装一些 serving 需要的包,检查一下自己的环境,并选择对应的版本安装:
!nvidia-smi
Tue Jan 25 09:56:06 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67 Driver Version: 418.67 CUDA Version: 10.1 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 Tesla V100-SXM2... On | 00000000:05:00.0 Off | 0 |
| N/A 34C P0 40W / 300W | 0MiB / 32480MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
# 下载包
!wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl
!wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.7.0-cp37-none-any.whl
!wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-0.7.0-py3-none-any.whl
# 安装包
!pip install paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl
!pip install paddle_serving_client-0.7.0-cp37-none-any.whl
!pip install paddle_serving_app-0.7.0-py3-none-any.whl
使用下述命令就可以用静态图模型转换为 serving 需要的模型
'''
--dirname 静态图模型保存目录
--model_filename 静态图模型文件名
--params_filename model.pdiparams
--serving_server deploy/serving/serving_server 服务端文件保存目录
--serving_client deploy/serving/serving_client 客户端文件保存目录
'''
!python3 -m paddle_serving_client.convert --dirname output_inference/retinanet_r50_fpn_1x_coco --model_filename model.pdmodel --params_filename model.pdiparams --serving_server deploy/serving/serving_server --serving_client deploy/serving/serving_client
输出模型可以在目录 deploy/serving
找到。
服务端及其配置文件
服务端主要需要进行输入预处理和结果后处理的编写,可以参考以下代码
class RetinaNetOp(Op):
def init_op(self):
# 这里 compose 一些需要的预处理操作
self.eval_transforms = Compose([
Resize(target_size=[800, 1333]),
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,0.225]),
Permute(),
PadStride(32)
])
def preprocess(self, input_dicts, data_id, log_id):
# 这里其实主要针对 bs 为 1 的情况
(_, input_dict), = input_dicts.items()
batch_size = len(input_dict.keys())
imgs = []
imgs_info = {'im_shape':[], 'scale_factor':[]}
for key in input_dict.keys():
# 对传入进来的数据进行解码
data = base64.b64decode(input_dict[key].encode('utf8'))
img = cv2.imdecode(np.frombuffer(data, np.uint8), cv2.IMREAD_COLOR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 添加模型需要的字段
im_info = {
'scale_factor': np.array([1., 1.], dtype=np.float32),
'im_shape': img.shape[:2],
}
# 进行数据增强和预处理
img, im_info = self.eval_transforms(img, im_info)
imgs.append(img[np.newaxis, :].copy())
imgs_info["im_shape"].append(im_info["im_shape"][np.newaxis, :].copy())
imgs_info["scale_factor"].append(im_info["scale_factor"][np.newaxis, :].copy())
input_imgs = np.concatenate(imgs, axis=0)
input_im_shape = np.concatenate(imgs_info["im_shape"], axis=0)
input_scale_factor = np.concatenate(imgs_info["scale_factor"], axis=0)
# 最后的 return 只需要管第一个,它的内容就是你的模型推理需要的输入
return {"image": input_imgs, "im_shape": input_im_shape, "scale_factor": input_scale_factor}, False, None, ""
def postprocess(self, input_dicts, fetch_dict, data_id, log_id):
# 这里可以通过 deploy/serving/serving_server/serving_server_conf.prototxt 通过静态图模型转化de'dao
np_boxes = list(fetch_dict.values())[0]
# 这里是已经经过 nms 过后的输出,按照这个进行后处理就好了
keep = (np_boxes[:, 1] > 0.5) & (np_boxes[:, 0] > -1)
np_boxes = np_boxes[keep, :]
result = {"class_id": [], "confidence": [], "left_top": [], "right_bottom": []}
for dt in np_boxes:
clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
xmin, ymin, xmax, ymax = bbox
result["class_id"].append(clsid)
result["confidence"].append(score)
result["left_top"].append([xmin, ymin])
result["right_bottom"].append([xmax, ymax])
result["class_id"] = str(result["class_id"])
result["confidence"] = str(result["confidence"])
result["left_top"] = str(result["left_top"])
result["right_bottom"] = str(result["right_bottom"])
return result, None, ""
# 其余的都是固定的操作
class RetinaNetService(WebService):
def get_pipeline_response(self, read_op):
retinanet_op = RetinaNetOp(name="retinanet", input_ops=[read_op])
return retinanet_op
# define the service class
uci_service = RetinaNetService(name="retinanet")
# load config and prepare the service
uci_service.prepare_pipeline_config("config.yml")
# start the service
uci_service.run_service()
关于服务端配置文件的编写,这部分需要编写的内容不多:
op:
# op 名称,与 web_service 中的 Service 类初始化 name 参数一致
retinanet:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 1
#当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
local_service_conf:
# serving 模型导出的位置
model_config: "./serving_server"
具体详情可以参考:
deploy/serving/web_service.py
deploy/serving/config.yml
关于预处理算子的编写可以参考 deploy/serving/preprocess_ops.py
根据自己需要进行添加。
这里需要取终端运行 python3 deploy/serving/web_service.py &
,这里注意如果之前运行过需要像下图一样 kill 掉,另外此时的所有路径需要改为绝对路径,而后面接入 tipc 使用的是相对路径。
客户端的编写
客户端就更简单了,主要说明你要使用哪个模型访问,测试哪张图片,具体如下:
def get_args(add_help=True):
import argparse
parser = argparse.ArgumentParser(
description='Paddle Serving', add_help=add_help)
# 需要测试的图片
parser.add_argument('--img_path', default="dataset/coco/test2017/000000575930.jpg")
args = parser.parse_args()
return args
# 对输入进行编码
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
def main(args):
# 访问的 url,注意改为你的模型名称
url = "http://127.0.0.1:18080/retinanet/prediction"
logid = 10000
img_path = args.img_path
with open(img_path, 'rb') as file:
image_data1 = file.read()
# data should be transformed to the base64 format
image = cv2_to_base64(image_data1)
data = {"key": ["image"], "value": [image], "logid": logid}
# send requests
r = requests.post(url=url, data=json.dumps(data))
print(r.json())
if __name__ == "__main__":
args = get_args()
main(args)
同样对编写客户端进行测试,按照上述方法进行启动服务端后,使用 ctrl c
,然后在终端输入命令运行客户端 python3 deploy/serving/pipeline_http_client.py
接入 TIPC
serving 接入 TIPC 没有之前那么复杂,需要改的东西很少,需要注意的点和之前也一样。要是 txt 文件的编写:
===========================serving_params===========================
model_name:RetinaNet
python:python3.7
trans_model:-m paddle_serving_client.convert
--dirname:output_infer/python/retinanet_r50/retinanet_r50_fpn_1x_coco # 静态图模型的位置,用于导出 serving model
--model_filename:model.pdmodel
--params_filename:model.pdiparams
--serving_server:deploy/serving/serving_server # server 端模型导出的位置
--serving_client:deploy/serving/serving_client # client 端模型导出的位置
serving_dir:./deploy/serving
web_service:web_service.py
op.alexnet.local_service_conf.devices:0
null:null
null:null
null:null
null:null
pipline:pipeline_http_client.py
ervice_conf.devices:0
null:null
null:null
null:null
null:null
pipline:pipeline_http_client.py
--img_path:../../dataset/coco/test2017/000000575930.jpg # 需要测试的图片
♦ 效果体验与展示
更多详细的介绍以及环境配置,可以参照文件 tipc/serving/README.md
,首先按照依赖:
!pip install -r requirements.txt
进行测试
!bash tipc/serving/test_serving.sh tipc/serving/configs/retinanet_r50_fpn_1x_coco.txt
################### run test ###################
/home/aistudio/deploy/serving
2022/01/25 11:23:14 start proxy service
W0125 11:23:18.311997 6867 analysis_predictor.cc:795] The one-time configuration of analysis predictor failed, which may be due to native predictor called first and its configurations taken effect.
I0125 11:23:18.441354 6867 analysis_predictor.cc:665] ir_optim is turned off, no IR pass will be executed
[1m[35m--- Running analysis [ir_graph_build_pass][0m
[1m[35m--- Running analysis [ir_graph_clean_pass][0m
[1m[35m--- Running analysis [ir_analysis_pass][0m
[1m[35m--- Running analysis [ir_params_sync_among_devices_pass][0m
I0125 11:23:18.746263 6867 ir_params_sync_among_devices_pass.cc:45] Sync params from CPU to GPU
[1m[35m--- Running analysis [adjust_cudnn_workspace_size_pass][0m
[1m[35m--- Running analysis [inference_op_replace_pass][0m
[1m[35m--- Running analysis [memory_optimize_pass][0m
I0125 11:23:18.907073 6867 memory_optimize_pass.cc:216] Cluster name : reshape2_35.tmp_1 size: 0
I0125 11:23:18.907119 6867 memory_optimize_pass.cc:216] Cluster name : fill_constant_43.tmp_0 size: 4
I0125 11:23:18.907122 6867 memory_optimize_pass.cc:216] Cluster name : fill_constant_41.tmp_0 size: 4
I0125 11:23:18.907131 6867 memory_optimize_pass.cc:216] Cluster name : im_shape size: 8
I0125 11:23:18.907136 6867 memory_optimize_pass.cc:216] Cluster name : scale_factor size: 8
I0125 11:23:18.907138 6867 memory_optimize_pass.cc:216] Cluster name : image size: 12
I0125 11:23:18.907143 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_181.tmp_1 size: 144
I0125 11:23:18.907147 6867 memory_optimize_pass.cc:216] Cluster name : batch_norm_52.tmp_3 size: 8192
I0125 11:23:18.907155 6867 memory_optimize_pass.cc:216] Cluster name : relu_39.tmp_0 size: 4096
I0125 11:23:18.907160 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_123.tmp_0 size: 8192
I0125 11:23:18.907163 6867 memory_optimize_pass.cc:216] Cluster name : batch_norm_49.tmp_1 size: 8192
I0125 11:23:18.907166 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_161.tmp_1 size: 144
I0125 11:23:18.907169 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_171.tmp_1 size: 144
I0125 11:23:18.907172 6867 memory_optimize_pass.cc:216] Cluster name : relu_45.tmp_0 size: 8192
I0125 11:23:18.907176 6867 memory_optimize_pass.cc:216] Cluster name : elementwise_add_15 size: 8192
I0125 11:23:18.907179 6867 memory_optimize_pass.cc:216] Cluster name : reshape2_28.tmp_0 size: 320
I0125 11:23:18.907183 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_141.tmp_1 size: 144
I0125 11:23:18.907186 6867 memory_optimize_pass.cc:216] Cluster name : relu_21.tmp_0 size: 2048
I0125 11:23:18.907189 6867 memory_optimize_pass.cc:216] Cluster name : relu_88.tmp_0 size: 1024
I0125 11:23:18.907197 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_151.tmp_1 size: 144
[1m[35m--- Running analysis [ir_graph_to_program_pass][0m
I0125 11:23:19.583788 6867 analysis_predictor.cc:714] ======= optimize end =======
I0125 11:23:19.620709 6867 naive_executor.cc:98] --- skip [feed], feed -> scale_factor
I0125 11:23:19.620766 6867 naive_executor.cc:98] --- skip [feed], feed -> image
I0125 11:23:19.620771 6867 naive_executor.cc:98] --- skip [feed], feed -> im_shape
I0125 11:23:19.632345 6867 naive_executor.cc:98] --- skip [_generated_var_22], fetch -> fetch
I0125 11:23:19.632387 6867 naive_executor.cc:98] --- skip [_generated_var_23], fetch -> fetch
W0125 11:23:19.708725 6867 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 10.1, Runtime API Version: 10.1
W0125 11:23:19.712651 6867 device_context.cc:465] device: 0, cuDNN Version: 7.6.
{'err_no': 0, 'err_msg': '', 'key': ['class_id', 'confidence', 'left_top', 'right_bottom'], 'value': ['[0, 27, 39, 39, 39, 39, 48, 48]', '[0.9298271, 0.78884697, 0.609955, 0.56487834, 0.56370527, 0.5328276, 0.6830632, 0.67401433]', '[[288.6603, 9.321735], [412.90067, 172.55153], [539.206, 3.6034787], [557.5477, 4.5205536], [521.5307, 4.789155], [571.5571, 0.0], [15.654112, 242.51068], [202.25995, 197.21396]]', '[[638.92633, 390.8219], [477.95944, 296.9499], [559.34314, 64.882324], [572.8147, 40.891556], [538.74994, 67.22812], [583.3348, 42.266556], [213.04216, 322.14337], [368.59772, 320.33978]]'], 'tensors': []}
[33m Run successfully with command - python3.7 pipeline_http_client.py --img_path=../../dataset/coco/test2017/000000575930.jpg> ../../tipc/serving/output/server_infer_gpu_pipeline_http_usetrt_null_precision_null_batchsize_1.log 2>&1! [0m
具体输出保存在 tipc/serving/output
,可以将 serving 输出:
{'err_no': 0, 'err_msg': '', 'key': ['class_id', 'confidence', 'left_top', 'right_bottom'], 'value': ['[0, 27, 39, 39, 39, 39, 48, 48]', '[0.9298271, 0.78884697, 0.609955, 0.56487834, 0.56370527, 0.5328276, 0.6830632, 0.67401433]', '[[288.6603, 9.321735], [412.90067, 172.55153], [539.206, 3.6034787], [557.5477, 4.5205536], [521.5307, 4.789155], [571.5571, 0.0], [15.654112, 242.51068], [202.25995, 197.21396]]', '[[638.92633, 390.8219], [477.95944, 296.9499], [559.34314, 64.882324], [572.8147, 40.891556], [538.74994, 67.22812], [583.3348, 42.266556], [213.04216, 322.14337], [368.59772, 320.33978]]'], 'tensors': []}
和 inference 输出进行对比
class_id:0, confidence:0.9298, left_top:[288.66,9.32],right_bottom:[638.93,390.82]
class_id:27, confidence:0.7888, left_top:[412.90,172.55],right_bottom:[477.96,296.95]
class_id:39, confidence:0.6100, left_top:[539.21,3.60],right_bottom:[559.34,64.88]
class_id:39, confidence:0.5649, left_top:[557.55,4.52],right_bottom:[572.81,40.89]
class_id:39, confidence:0.5637, left_top:[521.53,4.79],right_bottom:[538.75,67.23]
class_id:39, confidence:0.5328, left_top:[571.56,0.00],right_bottom:[583.33,42.27]
class_id:48, confidence:0.6831, left_top:[15.65,242.51],right_bottom:[213.04,322.14]
class_id:48, confidence:0.6740, left_top:[202.26,197.21],right_bottom:[368.60,320.34]
完全一致🚀🚀🚀,至此完结 🌸🌸🌸