总体简介

本项目主要根据 github 项目对如何接入 TIPC、如何支持 serving 进行介绍，关于更多原项目的信息可以查看 README.md 进行了解。另外，也可以参考官方的 TIPC 和 Serving 的示例来将自己的模型接入 TIPC 和进行 Serving 支持。

TIPC 基础链条

♣ 简介

其实，我们想做的就是用一个 shell 脚本去读取 txt 文件，然后到达 train、eval、动转静和 infer 的目的。我们希望只通过运行一个 shell 脚本，让使用者可以完成上述的一项或者多项功能。

♠ 具体流程

那么既然我们要进行 train、eval、动转静和 infer，首先我们得写好他们的 py 脚本文件。一般对应的分别是 train.py、eval.py、export_model.py 和 infer.py，前面两个我们一般在写算法的时候都会写，所以我来介绍一下后面两个脚本文件。

模型的动转静

这里其实主要是指模型由动态图转为静态图供后面的 inference 等使用。具体详细代码可以根据目录 ppdet/engine/trainer.py 中的第 531 行进行修改。：

import os

import paddle
from paddle.static import InputSpec


def export(model, model_name, output_dir):
    # 将模型调整为 eval 模式
    model.eval()
    # 设置保存静态图模型的地址
    save_dir = os.path.join(output_dir, model_name)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    # 设置输入 shape，如果是动态 shape 则设置为 [3, -1, -1]
    image_shape = [3, -1, -1]
    # 设置需要的其他输入
    input_spec = [{
        "image": InputSpec(
            shape=[None] + image_shape, name='image'),
        "im_shape": InputSpec(
            shape=[None, 2], name='im_shape'),
        "scale_factor": InputSpec(
            shape=[None, 2], name='scale_factor')
    }]

    static_model = paddle.jit.to_static(self.model, input_spec=input_spec)
    # NOTE: dy2st do not pruned program, but jit.save will prune program
    # input spec, prune input spec here and save with pruned input spec
    pruned_input_spec = _prune_input_spec(
        input_spec, 
        static_model.forward.main_program,
        static_model.forward.outputs
    )
    # 保存模型
    paddle.jit.save(
        static_model,
        os.path.join(save_dir, 'model'),
        input_spec=pruned_input_spec
    )


def _prune_input_spec(input_spec, program, targets):
    # try to prune static program to figure out pruned input spec
    # so we perform following operations in static mode
    paddle.enable_static()
    pruned_input_spec = [{}]
    program = program.clone()
    program = program._prune(targets=targets)
    global_block = program.global_block()

    for name, spec in input_spec[0].items():
        try:
            v = global_block.var(name)
            pruned_input_spec[0][name] = spec
        except Exception:
            pass
    paddle.disable_static()
    return pruned_input_spec

代码编写完成后，执行 tools/export_model.py 进行测试，静态图模型会被保存至 output_inference/retinanet_r50_fpn_1x_coco（具体保存位置根据自己编写的程序而定）。

# 安装所需依赖
!pip install -r requirements.txt

!python tools/export_model.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o weights=/home/aistudio/data/data104154/best_model.pdparams

模型的 Inference

这里可以使用 argparse 来接收用命令行启动时输入的参数。关于这部分以及更多可视化、视频推理和预处理 operator 的编写，可以参考目录：deploy/python/infer.py、deploy/python/preprocess.py

import os
import yaml

import numpy as np
import math
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
# RetinaNet-Based-on-PPdet-main/deploy/python/utils.py
# import 一些参数的设置、时间计算类、内存使用计算类
from utils import argsparser, Timer, get_current_memory_mb


class Detector(object):
    def __init__(self,
                 pred_config,
                 model_dir,
                 device='CPU',
                 run_mode='fluid',
                 batch_size=1,
                 trt_min_shape=1,
                 trt_max_shape=1280,
                 trt_opt_shape=640,
                 trt_calib_mode=False,
                 cpu_threads=1,
                 enable_mkldnn=False):
        self.pred_config = pred_config
        # 初始化 predictor 主要进行一些 cpu\gpu、Mkldnn\TensorRT 等相关设置
        self.predictor, self.config = load_predictor(
            model_dir,
            run_mode=run_mode,
            batch_size=batch_size,
            min_subgraph_size=self.pred_config.min_subgraph_size,
            device=device,
            use_dynamic_shape=self.pred_config.use_dynamic_shape,
            trt_min_shape=trt_min_shape,
            trt_max_shape=trt_max_shape,
            trt_opt_shape=trt_opt_shape,
            trt_calib_mode=trt_calib_mode,
            cpu_threads=cpu_threads,
            enable_mkldnn=enable_mkldnn)
        # 这两个函数用来记录时间和内存
        self.det_times = Timer()
        self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
    # 预处理函数
    def preprocess(self, image_list):
        # 这里主要是进行对 operator 进行设置
        # 如果预处理固定，也可以参考以下 link，直接 Compose 在一起就行了 
        # https://github.com/littletomatodonkey/AlexNet-Prod/blob/tipc/pipeline/Step5/AlexNet_paddle/deploy/inference_python/infer.py#L48
        preprocess_ops = []
        for op_info in self.pred_config.preprocess_infos:
            new_op_info = op_info.copy()
            op_type = new_op_info.pop('type')
            preprocess_ops.append(eval(op_type)(**new_op_info))
    
        input_im_lst = []
        input_im_info_lst = []
        for im_path in image_list:
            im, im_info = preprocess(im_path, preprocess_ops)
            input_im_lst.append(im)
            input_im_info_lst.append(im_info)
        # 将进行预处理的输入转换为模型需要的格式（根据自己的模型来，也许不需要这个函数）
        inputs = create_inputs(input_im_lst, input_im_info_lst)
        return inputs
    # 后处理
    def postprocess(self,
                    np_boxes,
                    np_masks,
                    inputs,
                    np_boxes_num,
                    threshold=0.5):
        # postprocess output of predictor
        results = {}
        results['boxes'] = np_boxes
        results['boxes_num'] = np_boxes_num
        if np_masks is not None:
            results['masks'] = np_masks
        return results

    def predict(self, image_list, threshold=0.5, warmup=0, repeats=1):
        # 前处理获得模型需要的输入
        self.det_times.preprocess_time_s.start()
        inputs = self.preprocess(image_list)
        self.det_times.preprocess_time_s.end()

        np_boxes, np_masks = None, None
        input_names = self.predictor.get_input_names()
        # 获取输入输出的 name
        for i in range(len(input_names)):
            input_tensor = self.predictor.get_input_handle(input_names[i])
            input_tensor.copy_from_cpu(inputs[input_names[i]])
        for i in range(warmup):
            self.predictor.run()
            output_names = self.predictor.get_output_names()
            boxes_tensor = self.predictor.get_output_handle(output_names[0])
            np_boxes = boxes_tensor.copy_to_cpu()
            if self.pred_config.mask:
                masks_tensor = self.predictor.get_output_handle(output_names[2])
                np_masks = masks_tensor.copy_to_cpu()

        self.det_times.inference_time_s.start()
        for i in range(repeats):
            self.predictor.run()
            output_names = self.predictor.get_output_names()
            boxes_tensor = self.predictor.get_output_handle(output_names[0])
            np_boxes = boxes_tensor.copy_to_cpu()
            boxes_num = self.predictor.get_output_handle(output_names[1])
            np_boxes_num = boxes_num.copy_to_cpu()
            if self.pred_config.mask:
                masks_tensor = self.predictor.get_output_handle(output_names[2])
                np_masks = masks_tensor.copy_to_cpu()
        self.det_times.inference_time_s.end(repeats=repeats)

        self.det_times.postprocess_time_s.start()
        results = []
        # 经过后处理返回结果
        if reduce(lambda x, y: x * y, np_boxes.shape) < 6:
            print('[WARNNING] No object detected.')
            results = {'boxes': np.array([[]]), 'boxes_num': [0]}
        else:
            results = self.postprocess(
                np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold)
        self.det_times.postprocess_time_s.end()
        self.det_times.img_num += len(image_list)
        return results

    def get_timer(self):
        return self.det_times


def load_predictor(model_dir,
                   run_mode='fluid',
                   batch_size=1,
                   device='CPU',
                   min_subgraph_size=3,
                   use_dynamic_shape=False,
                   trt_min_shape=1,
                   trt_max_shape=1280,
                   trt_opt_shape=640,
                   trt_calib_mode=False,
                   cpu_threads=1,
                   enable_mkldnn=False):
    if device != 'GPU' and run_mode != 'fluid':
        raise ValueError(
            "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
            .format(run_mode, device))
    # 利用 inference 的 api 将模型加载进来
    config = Config(
        os.path.join(model_dir, 'model.pdmodel'),
        os.path.join(model_dir, 'model.pdiparams')
    )
    if device == 'GPU':
        # initial GPU memory(M), device ID
        config.enable_use_gpu(200, 0)
        # optimize graph and fuse op
        config.switch_ir_optim(True)
    elif device == 'XPU':
        config.enable_xpu(10 * 1024 * 1024)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(cpu_threads)
        if enable_mkldnn:
            try:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
            except Exception as e:
                print(
                    "The current environment does not support `mkldnn`, so disable mkldnn."
                )
                pass

    precision_map = {
        'trt_int8': Config.Precision.Int8,
        'trt_fp32': Config.Precision.Float32,
        'trt_fp16': Config.Precision.Half
    }
    if run_mode in precision_map.keys():
        config.enable_tensorrt_engine(
            workspace_size=1 << 10,
            max_batch_size=batch_size,
            min_subgraph_size=min_subgraph_size,
            precision_mode=precision_map[run_mode],
            use_static=False,
            use_calib_mode=trt_calib_mode)

        if use_dynamic_shape:
            min_input_shape = {
                'image': [batch_size, 3, trt_min_shape, trt_min_shape]
            }
            max_input_shape = {
                'image': [batch_size, 3, trt_max_shape, trt_max_shape]
            }
            opt_input_shape = {
                'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]
            }
            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
                                              opt_input_shape)
            print('trt set dynamic shape done!')

    # disable print log when predict
    config.disable_glog_info()
    # enable shared memory
    config.enable_memory_optim()
    # disable feed, fetch OP, needed by zero_copy_run
    config.switch_use_feed_fetch_ops(False)
    predictor = create_predictor(config)
    return predictor, config

'''
主要分为 bs 是否等 1 处理，考虑到可能要 padding。
另外把需要的字段及相关信息放到一个字典里，然后返回。
'''
def create_inputs(imgs, im_info):
    inputs = {}

    im_shape = []
    scale_factor = []
    if len(imgs) == 1:
        inputs['image'] = np.array((imgs[0], )).astype('float32')
        inputs['im_shape'] = np.array(
            (im_info[0]['im_shape'], )).astype('float32')
        inputs['scale_factor'] = np.array(
            (im_info[0]['scale_factor'], )).astype('float32')
        return inputs

    for e in im_info:
        im_shape.append(np.array((e['im_shape'], )).astype('float32'))
        scale_factor.append(np.array((e['scale_factor'], )).astype('float32'))

    inputs['im_shape'] = np.concatenate(im_shape, axis=0)
    inputs['scale_factor'] = np.concatenate(scale_factor, axis=0)

    imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs]
    max_shape_h = max([e[0] for e in imgs_shape])
    max_shape_w = max([e[1] for e in imgs_shape])
    padding_imgs = []
    for img in imgs:
        im_c, im_h, im_w = img.shape[:]
        padding_im = np.zeros(
            (im_c, max_shape_h, max_shape_w), dtype=np.float32)
        padding_im[:, :im_h, :im_w] = img
        padding_imgs.append(padding_im)
    inputs['image'] = np.stack(padding_imgs, axis=0)
    return inputs

可以对编写好的 inference 代码，加载刚才导出的静态图模型，对图片进行测试

!python ./deploy/python/infer.py --device=gpu --model_dir=output_inference/retinanet_r50_fpn_1x_coco --batch_size=1 --image_dir=demo

结果的可视化会被保存至 .\output:

关于 txt 命令配置文件和 shell 脚本的编写 Inference

txt 命令配置文件

shell 脚本在读取 txt 文件的信息是通过行数来读取的，是行对应的。比如，在 shell 脚本中编写在 txt 文件中的第 4 行（从 0 开始计数）读取是否使用 gpu，此时你如果把 use_gpu:True 写到了第 6 行，那么 shell 这边读到就是错误的信息。因此，不要随意添加或者删除行。如果必须添加或删除行，那么则需要同时修改对应的 shell 脚本。
以下注释正式使用的时候可以删除（注意用占行的双 # 号不要删除）。

===========================train_params===========================
model_name:retinanet_r50_fpn_1x_coco # 模型名称一般用作储存输出的文件夹名字
python:python3.7
gpu_list:0 # 一般设置为 0，暂不支持多 gpu
use_gpu:True # 是否使用 gpu，如果除了 gpu 还要测试 cpu，则写为 True|False
auto_cast:null|amp # 是否测试半精度
epoch:lite_train_lite_infer=2|lite_train_whole_infer=1|whole_train_whole_infer=12
save_dir:tipc/train_infer_python/output/retinanet_r50 # output 保存的地址
TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_train_whole_infer=1
pretrain_weights:/home/aistudio/data/data104154/best_model.pdparams # 预训练权重位置，也可以放最优权重
train_model_name:model_final.pdparams # train 保存的权重名称
train_infer_img_dir:./dataset/coco/test2017/ # 数据集位置
filename:retinanet_r50_fpn_1x_coco
##
trainer:norm_train # 这里只对 norm_train 模式验证就好了
norm_train:tools/train.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o # 这里是运行 train 的命令行 一般需要修改的是 -c 后面的配置文件位置
pact_train:null
fpgm_train:null
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:tools/eval.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o # 这里是运行 eval 的命令行
null:null
##
===========================infer_params===========================
--output_dir:./output_infer/python/retinanet_r50 # inference 的输出位置
weights:/home/aistudio/data/data104154/best_model.pdparams # 进行 inference 载入的最优权重
norm_export:tools/export_model.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o # 静态图模型导出的命令行
pact_export:null
fpgm_export:null
distill_export:null
export1:null
export2:null
kl_quant_export:null
##
infer_mode:norm
infer_quant:False
inference:./deploy/python/infer.py # infer 脚本的位置
--device:gpu|cpu # 对 gpu、cpu 都进行测试
--enable_mkldnn:True|False # cpu 测试时是否使用 mkldnn
--cpu_threads:1|4 # 测试不同线程
--batch_size:1
--use_tensorrt:null # gpu 测试时是否使用 tensorrt
--run_mode:fluid # 这里可以对 tensorrt 的精度进行设置，本次不要求
--model_dir:tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null/retinanet_r50_fpn_1x_coco # 进行 inference 载入的权重
--image_dir:./dataset/coco/test2017/ # inference 需要的图片
--save_log_path:null
--run_benchmark:False
--trt_max_shape:1600

对于 inference 中运行 infer.py 配置的其他的参数如 device、enable_mkldnn 等，就是输入到前面代码中的 load_predictor 函数对 predictor 进行初始化。

关于 shell 脚本文件。其实关于没有太多需要修改的地方，可能需要根据自己写的 txt 文件和 py 脚本文件需要的参数输入。
- 可能需要对自己在 txt 增加的行进行解析，参照下述前两行，并且更新其他参数更新的行数（因为你增加了一行，那么在它之后的参数解析中 $(func_parser_key "${lines[i]}") 的 i 就应该加 1。
- 根据自己的 py 脚本需要的参数，进行增删 args 或 kwargs 如下述中的，"${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_autocast}"

your_params_key=$(func_parser_key "${lines[4]}")
your_params_value=$(func_parser_value "${lines[4]}")

cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_autocast}"

模型的轻量化验证

其实如果走通了原模型的 TIPC 验证，那么轻量化验证的 TIPC 就很容易了，这里以 mobilenetv1 为例，首先将原模型的 backbone 替换为 mobilenetv1。需要确定的是自己需要返回哪几层的特征，这里可以通过源码阅读了解网络结构得到： RetinaNet-Based-on-PPdet-main/ppdet/modeling/backbones/mobilenet_v1.py，这里需要返回的是第 4, 6, 13 层的特征，这几层的 stride 刚好是 8， 16 和 32。预训练权重也得使用 mobilenetv1 的。可能优化器方面也需要进行修改，其余的都可以使用原来的：

_BASE_: [
  '../datasets/coco_detection.yml',
  '../runtime.yml',
  '_base_/retinanet_r50_fpn.yml',
  '_base_/optimizer_1x.yml',
  '_base_/retinanet_reader.yml',
]

pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams

RetinaNet:
  backbone: MobileNet

MobileNet:
  scale: 1
  feature_maps: [4, 6, 13]
  with_extra_blocks: false
  extra_block_filters: []

然后训练获得最优权重后，将原来的 txt 文件的相关部分修改为 mobilenetv1 的内容（比如最优权重位置换为 mobilenetv1 的），这里两个对比的文件为：
tipc/train_infer_python/configs/retinanet/retinanet_mobilenet_v1_fpn_1x_coco.txt
tipc/train_infer_python/configs/retinanet/retinanet_r50_fpn_1x_coco.txt

♦ 效果体验与展示

更多详细的介绍可以参照文件 tipc/train_infer_python/README.md

安装各种依赖和准备数据集

!pip install -r requirements.txt
!bash tipc/train_infer_python/prepare.sh tipc/train_infer_python/configs/retinanet/retinanet_r50_fpn_1x_coco.txt 'lite_train_lite_infer'

对 retinanet_r50_fpn_1x_coco 使用少量数据进行训练和推理

!bash tipc/train_infer_python/test_train_inference_python.sh tipc/train_infer_python/configs/retinanet/retinanet_r50_fpn_1x_coco.txt 'lite_train_lite_infer'

如果运行成功则会输出，如下：

Run successfully with command - python3.7 tools/train.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o use_gpu=True save_dir=tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null epoch=2 pretrain_weights=/home/aistudio/data/data104154/best_model.pdparams TrainReader.batch_size=2 filename=retinanet_r50_fpn_1x_coco  ! 
Run successfully with command - python3.7 tools/eval.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o weights=tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null/retinanet_r50_fpn_1x_coco/model_final.pdparams use_gpu=True  !  
Run successfully with command - python3.7 tools/export_model.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o weights=tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null/retinanet_r50_fpn_1x_coco/model_final.pdparams filename=retinanet_r50_fpn_1x_coco --output_dir=./output_infer/python/retinanet_r50 !
Run successfully with command - python3.7 ./deploy/python/infer.py --device=gpu --run_mode=fluid --model_dir=./output_infer/python/retinanet_r50/retinanet_r50_fpn_1x_coco --batch_size=1 --image_dir=./dataset/coco/test2017/ --run_benchmark=False --trt_max_shape=1600 --output_dir=./output_infer/python/retinanet_r50 > tipc/train_infer_python/output/retinanet_r50/python_infer_gpu_precision_fluid_batchsize_1.log 2>&1 !

对 retinanet_mobilenet_v1_fpn_1x_coco 使用少量数据进行训练和推理

!bash tipc/train_infer_python/test_train_inference_python.sh tipc/train_infer_python/configs/retinanet/retinanet_mobilenet_v1_fpn_1x_coco.txt 'lite_train_lite_infer'

如果运行成功也会输出上述信息，输出和 log 文件保存至 tipc/train_infer_python/output。将输出进行可视化如下：

Serving支持

♣ 简介

serving 部署需要做的事就是在服务端加载好模型，然后通过客户端访问，并且进行推理。这里加载的模型是由静态图模型转换而来的，所以需要像之前一样进行静态图模型的导出。

♠ 具体流程

如果我们已经做过 TIPC 了，那么我们就已经有该模型的静态图模型了。那么我们对静态图模型进行转换得到需要的 serving 模型后，主要进行服务端、客户端以及一些前后预处理，最后再把它连入 TIPC，就是跟之前一样写一个 shell 脚本和对应的 txt 文件。

环境的准备

首先，安装一些 serving 需要的包，检查一下自己的环境，并选择对应的版本安装：

!nvidia-smi

Tue Jan 25 09:56:06 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  Tesla V100-SXM2...  On   | 00000000:05:00.0 Off |                    0 |
| N/A   34C    P0    40W / 300W |      0MiB / 32480MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

# 下载包
!wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl
!wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.7.0-cp37-none-any.whl
!wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-0.7.0-py3-none-any.whl

# 安装包
!pip install paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl
!pip install paddle_serving_client-0.7.0-cp37-none-any.whl
!pip install paddle_serving_app-0.7.0-py3-none-any.whl

使用下述命令就可以用静态图模型转换为 serving 需要的模型

'''
--dirname                                        静态图模型保存目录
--model_filename                                 静态图模型文件名   
--params_filename model.pdiparams 
--serving_server deploy/serving/serving_server   服务端文件保存目录         
--serving_client deploy/serving/serving_client   客户端文件保存目录     
'''
!python3 -m paddle_serving_client.convert --dirname output_inference/retinanet_r50_fpn_1x_coco --model_filename model.pdmodel --params_filename model.pdiparams --serving_server deploy/serving/serving_server --serving_client deploy/serving/serving_client

输出模型可以在目录 deploy/serving 找到。

服务端及其配置文件

服务端主要需要进行输入预处理和结果后处理的编写，可以参考以下代码

class RetinaNetOp(Op):
    def init_op(self):
        # 这里 compose 一些需要的预处理操作
        self.eval_transforms = Compose([
            Resize(target_size=[800, 1333]), 
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,0.225]), 
            Permute(),
            PadStride(32)
        ])

    def preprocess(self, input_dicts, data_id, log_id):
        # 这里其实主要针对 bs 为 1 的情况
        (_, input_dict), = input_dicts.items()
        batch_size = len(input_dict.keys())
        imgs = []
        imgs_info = {'im_shape':[], 'scale_factor':[]}
        for key in input_dict.keys():
            # 对传入进来的数据进行解码
            data = base64.b64decode(input_dict[key].encode('utf8'))
            img = cv2.imdecode(np.frombuffer(data, np.uint8), cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # 添加模型需要的字段
            im_info = {
                'scale_factor': np.array([1., 1.], dtype=np.float32),
                'im_shape': img.shape[:2],
            }
            # 进行数据增强和预处理
            img, im_info = self.eval_transforms(img, im_info)
            imgs.append(img[np.newaxis, :].copy())
            imgs_info["im_shape"].append(im_info["im_shape"][np.newaxis, :].copy())
            imgs_info["scale_factor"].append(im_info["scale_factor"][np.newaxis, :].copy())
            
        input_imgs = np.concatenate(imgs, axis=0)
        input_im_shape = np.concatenate(imgs_info["im_shape"], axis=0)
        input_scale_factor = np.concatenate(imgs_info["scale_factor"], axis=0)
        # 最后的 return 只需要管第一个，它的内容就是你的模型推理需要的输入
        return {"image": input_imgs, "im_shape": input_im_shape, "scale_factor": input_scale_factor}, False, None, ""

    def postprocess(self, input_dicts, fetch_dict, data_id, log_id):
        # 这里可以通过 deploy/serving/serving_server/serving_server_conf.prototxt 通过静态图模型转化de'dao
        np_boxes = list(fetch_dict.values())[0]
        # 这里是已经经过 nms 过后的输出，按照这个进行后处理就好了
        keep = (np_boxes[:, 1] > 0.5) & (np_boxes[:, 0] > -1)
        np_boxes = np_boxes[keep, :]
        result = {"class_id": [], "confidence": [], "left_top": [], "right_bottom": []}
        for dt in np_boxes:
            clsid, bbox, score = int(dt[0]), dt[2:], dt[1]

            xmin, ymin, xmax, ymax = bbox

            result["class_id"].append(clsid)
            result["confidence"].append(score)
            result["left_top"].append([xmin, ymin])
            result["right_bottom"].append([xmax, ymax])

        result["class_id"] = str(result["class_id"])
        result["confidence"] = str(result["confidence"])
        result["left_top"] = str(result["left_top"])
        result["right_bottom"] = str(result["right_bottom"])

        return result, None, ""

# 其余的都是固定的操作
class RetinaNetService(WebService):
    def get_pipeline_response(self, read_op):
        retinanet_op = RetinaNetOp(name="retinanet", input_ops=[read_op])
        return retinanet_op

# define the service class
uci_service = RetinaNetService(name="retinanet")
# load config and prepare the service
uci_service.prepare_pipeline_config("config.yml")
# start the service
uci_service.run_service()

关于服务端配置文件的编写，这部分需要编写的内容不多：

op:
    # op 名称，与 web_service 中的 Service 类初始化 name 参数一致
    retinanet:
        #并发数，is_thread_op=True时，为线程并发；否则为进程并发
        concurrency: 1

        #当op配置没有server_endpoints时，从local_service_conf读取本地服务配置
        local_service_conf:
            # serving 模型导出的位置
            model_config: "./serving_server"

具体详情可以参考：
deploy/serving/web_service.py deploy/serving/config.yml
关于预处理算子的编写可以参考 deploy/serving/preprocess_ops.py 根据自己需要进行添加。

这里需要取终端运行 python3 deploy/serving/web_service.py &，这里注意如果之前运行过需要像下图一样 kill 掉，另外此时的所有路径需要改为绝对路径，而后面接入 tipc 使用的是相对路径。

客户端的编写

客户端就更简单了，主要说明你要使用哪个模型访问，测试哪张图片，具体如下：

def get_args(add_help=True):
    import argparse
    parser = argparse.ArgumentParser(
        description='Paddle Serving', add_help=add_help)
    # 需要测试的图片
    parser.add_argument('--img_path', default="dataset/coco/test2017/000000575930.jpg")
    args = parser.parse_args()
    return args

# 对输入进行编码
def cv2_to_base64(image):
    return base64.b64encode(image).decode('utf8')


def main(args):
    # 访问的 url，注意改为你的模型名称
    url = "http://127.0.0.1:18080/retinanet/prediction"
    logid = 10000

    img_path = args.img_path
    with open(img_path, 'rb') as file:
        image_data1 = file.read()
    # data should be transformed to the base64 format
    image = cv2_to_base64(image_data1)
    data = {"key": ["image"], "value": [image], "logid": logid}
    # send requests
    r = requests.post(url=url, data=json.dumps(data))
    print(r.json())


if __name__ == "__main__":
    args = get_args()
    main(args)

同样对编写客户端进行测试，按照上述方法进行启动服务端后，使用 ctrl c，然后在终端输入命令运行客户端 python3 deploy/serving/pipeline_http_client.py

接入 TIPC

serving 接入 TIPC 没有之前那么复杂，需要改的东西很少，需要注意的点和之前也一样。要是 txt 文件的编写：

===========================serving_params===========================
model_name:RetinaNet
python:python3.7
trans_model:-m paddle_serving_client.convert
--dirname:output_infer/python/retinanet_r50/retinanet_r50_fpn_1x_coco # 静态图模型的位置，用于导出 serving model
--model_filename:model.pdmodel
--params_filename:model.pdiparams
--serving_server:deploy/serving/serving_server # server 端模型导出的位置
--serving_client:deploy/serving/serving_client # client 端模型导出的位置
serving_dir:./deploy/serving
web_service:web_service.py
op.alexnet.local_service_conf.devices:0
null:null
null:null
null:null
null:null
pipline:pipeline_http_client.py
ervice_conf.devices:0
null:null
null:null
null:null
null:null
pipline:pipeline_http_client.py
--img_path:../../dataset/coco/test2017/000000575930.jpg # 需要测试的图片

♦ 效果体验与展示

更多详细的介绍以及环境配置，可以参照文件 tipc/serving/README.md，首先按照依赖：

!pip install -r requirements.txt

进行测试

!bash tipc/serving/test_serving.sh tipc/serving/configs/retinanet_r50_fpn_1x_coco.txt

################### run test ###################
/home/aistudio/deploy/serving

2022/01/25 11:23:14 start proxy service
W0125 11:23:18.311997  6867 analysis_predictor.cc:795] The one-time configuration of analysis predictor failed, which may be due to native predictor called first and its configurations taken effect.
I0125 11:23:18.441354  6867 analysis_predictor.cc:665] ir_optim is turned off, no IR pass will be executed
[1m[35m--- Running analysis [ir_graph_build_pass][0m
[1m[35m--- Running analysis [ir_graph_clean_pass][0m
[1m[35m--- Running analysis [ir_analysis_pass][0m
[1m[35m--- Running analysis [ir_params_sync_among_devices_pass][0m
I0125 11:23:18.746263  6867 ir_params_sync_among_devices_pass.cc:45] Sync params from CPU to GPU
[1m[35m--- Running analysis [adjust_cudnn_workspace_size_pass][0m
[1m[35m--- Running analysis [inference_op_replace_pass][0m
[1m[35m--- Running analysis [memory_optimize_pass][0m
I0125 11:23:18.907073  6867 memory_optimize_pass.cc:216] Cluster name : reshape2_35.tmp_1  size: 0
I0125 11:23:18.907119  6867 memory_optimize_pass.cc:216] Cluster name : fill_constant_43.tmp_0  size: 4
I0125 11:23:18.907122  6867 memory_optimize_pass.cc:216] Cluster name : fill_constant_41.tmp_0  size: 4
I0125 11:23:18.907131  6867 memory_optimize_pass.cc:216] Cluster name : im_shape  size: 8
I0125 11:23:18.907136  6867 memory_optimize_pass.cc:216] Cluster name : scale_factor  size: 8
I0125 11:23:18.907138  6867 memory_optimize_pass.cc:216] Cluster name : image  size: 12
I0125 11:23:18.907143  6867 memory_optimize_pass.cc:216] Cluster name : conv2d_181.tmp_1  size: 144
I0125 11:23:18.907147  6867 memory_optimize_pass.cc:216] Cluster name : batch_norm_52.tmp_3  size: 8192
I0125 11:23:18.907155  6867 memory_optimize_pass.cc:216] Cluster name : relu_39.tmp_0  size: 4096
I0125 11:23:18.907160  6867 memory_optimize_pass.cc:216] Cluster name : conv2d_123.tmp_0  size: 8192
I0125 11:23:18.907163  6867 memory_optimize_pass.cc:216] Cluster name : batch_norm_49.tmp_1  size: 8192
I0125 11:23:18.907166  6867 memory_optimize_pass.cc:216] Cluster name : conv2d_161.tmp_1  size: 144
I0125 11:23:18.907169  6867 memory_optimize_pass.cc:216] Cluster name : conv2d_171.tmp_1  size: 144
I0125 11:23:18.907172  6867 memory_optimize_pass.cc:216] Cluster name : relu_45.tmp_0  size: 8192
I0125 11:23:18.907176  6867 memory_optimize_pass.cc:216] Cluster name : elementwise_add_15  size: 8192
I0125 11:23:18.907179  6867 memory_optimize_pass.cc:216] Cluster name : reshape2_28.tmp_0  size: 320
I0125 11:23:18.907183  6867 memory_optimize_pass.cc:216] Cluster name : conv2d_141.tmp_1  size: 144
I0125 11:23:18.907186  6867 memory_optimize_pass.cc:216] Cluster name : relu_21.tmp_0  size: 2048
I0125 11:23:18.907189  6867 memory_optimize_pass.cc:216] Cluster name : relu_88.tmp_0  size: 1024
I0125 11:23:18.907197  6867 memory_optimize_pass.cc:216] Cluster name : conv2d_151.tmp_1  size: 144
[1m[35m--- Running analysis [ir_graph_to_program_pass][0m
I0125 11:23:19.583788  6867 analysis_predictor.cc:714] ======= optimize end =======
I0125 11:23:19.620709  6867 naive_executor.cc:98] ---  skip [feed], feed -> scale_factor
I0125 11:23:19.620766  6867 naive_executor.cc:98] ---  skip [feed], feed -> image
I0125 11:23:19.620771  6867 naive_executor.cc:98] ---  skip [feed], feed -> im_shape
I0125 11:23:19.632345  6867 naive_executor.cc:98] ---  skip [_generated_var_22], fetch -> fetch
I0125 11:23:19.632387  6867 naive_executor.cc:98] ---  skip [_generated_var_23], fetch -> fetch
W0125 11:23:19.708725  6867 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 10.1, Runtime API Version: 10.1
W0125 11:23:19.712651  6867 device_context.cc:465] device: 0, cuDNN Version: 7.6.
{'err_no': 0, 'err_msg': '', 'key': ['class_id', 'confidence', 'left_top', 'right_bottom'], 'value': ['[0, 27, 39, 39, 39, 39, 48, 48]', '[0.9298271, 0.78884697, 0.609955, 0.56487834, 0.56370527, 0.5328276, 0.6830632, 0.67401433]', '[[288.6603, 9.321735], [412.90067, 172.55153], [539.206, 3.6034787], [557.5477, 4.5205536], [521.5307, 4.789155], [571.5571, 0.0], [15.654112, 242.51068], [202.25995, 197.21396]]', '[[638.92633, 390.8219], [477.95944, 296.9499], [559.34314, 64.882324], [572.8147, 40.891556], [538.74994, 67.22812], [583.3348, 42.266556], [213.04216, 322.14337], [368.59772, 320.33978]]'], 'tensors': []}
[33m Run successfully with command - python3.7 pipeline_http_client.py --img_path=../../dataset/coco/test2017/000000575930.jpg> ../../tipc/serving/output/server_infer_gpu_pipeline_http_usetrt_null_precision_null_batchsize_1.log 2>&1!  [0m

具体输出保存在 tipc/serving/output，可以将 serving 输出：

{'err_no': 0, 'err_msg': '', 'key': ['class_id', 'confidence', 'left_top', 'right_bottom'], 'value': ['[0, 27, 39, 39, 39, 39, 48, 48]', '[0.9298271, 0.78884697, 0.609955, 0.56487834, 0.56370527, 0.5328276, 0.6830632, 0.67401433]', '[[288.6603, 9.321735], [412.90067, 172.55153], [539.206, 3.6034787], [557.5477, 4.5205536], [521.5307, 4.789155], [571.5571, 0.0], [15.654112, 242.51068], [202.25995, 197.21396]]', '[[638.92633, 390.8219], [477.95944, 296.9499], [559.34314, 64.882324], [572.8147, 40.891556], [538.74994, 67.22812], [583.3348, 42.266556], [213.04216, 322.14337], [368.59772, 320.33978]]'], 'tensors': []}

和 inference 输出进行对比

class_id:0, confidence:0.9298, left_top:[288.66,9.32],right_bottom:[638.93,390.82]
class_id:27, confidence:0.7888, left_top:[412.90,172.55],right_bottom:[477.96,296.95]
class_id:39, confidence:0.6100, left_top:[539.21,3.60],right_bottom:[559.34,64.88]
class_id:39, confidence:0.5649, left_top:[557.55,4.52],right_bottom:[572.81,40.89]
class_id:39, confidence:0.5637, left_top:[521.53,4.79],right_bottom:[538.75,67.23]
class_id:39, confidence:0.5328, left_top:[571.56,0.00],right_bottom:[583.33,42.27]
class_id:48, confidence:0.6831, left_top:[15.65,242.51],right_bottom:[213.04,322.14]
class_id:48, confidence:0.6740, left_top:[202.26,197.21],right_bottom:[368.60,320.34]

完全一致🚀🚀🚀，至此完结 🌸🌸🌸