50、实例分割SparseInst模型部署mnn、ncnn、rknn，完成业务需求-CFANZ编程社区

基本思想：看了金东大佬的知乎，羡慕其检测速度和效果，业务存在对实例分割的需求，需要移植嵌入式开发板上，趁国庆节休息几天，搞一下几个例子，部署一下

一、下载官方代码，测试一下

50、实例分割SparseInst模型部署mnn、ncnn、rknn，完成业务需求_人工智能

50、实例分割SparseInst模型部署mnn、ncnn、rknn，完成业务需求_深度学习_02

首先配置一下环境

git clone https://github.com/facebookresearch/detectron2.git
# if you swith to a specific version, e.g., v0.3 (recommended) or v0.6
git checkout tags/v0.6
# build detectron2
python setup.py build develop　--user

然后测试一下

ubuntu@ubuntu:~$ git clone https://github.com/hustvl/SparseInst.git
ubuntu@ubuntu:~/SparseInst$ python demo.py --config-file configs/sparse_inst_r50_giam.yaml --input /home/ubuntu/Downloads/rknn-toolkit-1.7.1/examples/onnx/yolov5/bus.jpg --output results --opts MODEL.WEIGHTS sparse_inst_r50_giam_ceaffc.pth

测试结果

50、实例分割SparseInst模型部署mnn、ncnn、rknn，完成业务需求_1024程序员节_03

50、实例分割SparseInst模型部署mnn、ncnn、rknn，完成业务需求_人工智能_04

二、官方提供了转换onnx的代码

ubuntu@ubuntu:~/SparseInst$ python3 onnx/convert_onnx.py --config-file configs/sparse_inst_r50_giam.yaml --width 640 --height 640 --output output/sparseinst.onnx --opts MODEL.WEIGHTS sparse_inst_r50_giam_ceaffc.pth

修改了官方的代码convertonnx.py代码

import math
import argparse

import onnxruntime
import torch
from torch import nn
from torch.nn import functional as F

from detectron2.layers import Conv2d
from detectron2.utils.logger import setup_logger
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg

from sparseinst import add_sparse_inst_config


class PyramidPoolingModuleONNX(nn.Module):

    def __init__(self, in_channels, channels, input_size, pool_sizes=(1, 2, 3, 6)):
        super().__init__()
        self.stages = []
        self.stages = nn.ModuleList(
            [self._make_stage(in_channels, channels, input_size, pool_size)
             for pool_size in pool_sizes]
        )
        self.bottleneck = Conv2d(
            in_channels + len(pool_sizes) * channels, in_channels, 1)

    def _make_stage(self, features, out_features, input_size, pool_size):
        stride_y = math.floor((input_size[0] / pool_size))
        stride_x = math.floor((input_size[1] / pool_size))
        kernel_y = input_size[0] - (pool_size - 1) * stride_y
        kernel_x = input_size[1] - (pool_size - 1) * stride_x
        prior = nn.AvgPool2d(kernel_size=(
            kernel_y, kernel_x), stride=(stride_y, stride_x))
        conv = Conv2d(features, out_features, 1)
        return nn.Sequential(prior, conv)

    def forward(self, feats):
        h, w = feats.size(2), feats.size(3)
        priors = [F.interpolate(
            input=F.relu_(stage(feats)), size=(h, w), mode='bilinear', align_corners=False) for stage in self.stages] + [feats]
        out = F.relu_(self.bottleneck(torch.cat(priors, 1)))
        return out


def main():
    parser = argparse.ArgumentParser(
        description="Export model to the onnx format")
    parser.add_argument(
        "--config-file",
        default="configs/sparse_inst_r50_giam.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument('--width', default=640, type=int)
    parser.add_argument('--height', default=640, type=int)
    parser.add_argument('--level', default=0, type=int)
    parser.add_argument(
        "--output",
        default="output/sparseinst.onnx",
        metavar="FILE",
        help="path to the output onnx file",
    )


    parser.add_argument(
        "--opts",
        help="Modify config options using the command-line 'KEY VALUE' pairs",
        default=[],
        nargs=argparse.REMAINDER,
    )

    cfg = get_cfg()
    add_sparse_inst_config(cfg)
    args = parser.parse_args()
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)

    # norm for ONNX: change FrozenBN back to BN
    cfg.MODEL.BACKBONE.FREEZE_AT = 0
    cfg.MODEL.RESNETS.NORM = "BN"

    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    logger = setup_logger(output=output_dir)
    logger.info(cfg)

    height = args.height
    width = args.width

    model = build_model(cfg)
    num_channels = cfg.MODEL.SPARSE_INST.ENCODER.NUM_CHANNELS
    onnx_ppm = PyramidPoolingModuleONNX(
        num_channels, num_channels // 4, (height // 32, width // 32))
    model.encoder.ppm = onnx_ppm
    model.to(cfg.MODEL.DEVICE)
    logger.info("Model:\n{}".format(model))

    checkpointer = DetectionCheckpointer(model)
    _ = checkpointer.load(cfg.MODEL.WEIGHTS)
    logger.info("load Model:\n{}".format(cfg.MODEL.WEIGHTS))

    input_names = ["input_image"]
    dummy_input = torch.zeros((1, 3, height, width))
    from PIL import Image
    import cv2
    source_img=cv2.imread("../0.jpeg")
    bgr = cv2.cvtColor(source_img, cv2.COLOR_BGR2RGB)


    # max_size=853
    # short_edge_length=640
    # oldh,oldw=source_img.shape[0],source_img.shape[1]
    # h, w = oldh, oldw
    # size = short_edge_length * 1.0
    # scale = size / min(h, w)
    # if h < w:
    #     newh, neww = size, scale * w
    # else:
    #     newh, neww = scale * h, size
    # if max(newh, neww) > max_size:
    #     scale = max_size * 1.0 / max(newh, neww)
    #     newh = newh * scale
    #     neww = neww * scale
    # neww = int(neww + 0.5)
    # newh = int(newh + 0.5)
    rgb = cv2.resize(bgr, (640, 640))
    # image_sizes=[neww,newh]
    # max_size=[0,0]
    # size_divisibility=32
    # if size_divisibility > 1:
    #     stride = size_divisibility
    #     # the last two dims are H,W, both subject to divisibility requirement
    #     max_size[0] = (image_sizes[0] + (stride - 1)) // stride * stride
    #     max_size[1] = (image_sizes[1] + (stride - 1)) // stride * stride
    #
    # padding_size = [0, max_size[0]- image_sizes[0], 0, max_size[1] - image_sizes[1]]
    # rgb=cv2.copyMakeBorder(rgb, padding_size[0],  padding_size[2],  padding_size[1],  padding_size[3], cv2.BORDER_CONSTANT,(0, 0, 0))
    import numpy as np
    image = np.transpose(rgb, (2, 0, 1)).astype(np.float32)  # chw rgb
    image[0, ...] = (image[0, ...] - 123.6750) / 58.3950
    image[1, ...] = (image[1, ...] - 116.2800) / 57.1200
    image[2, ...] = (image[2, ...] - 103.5300) / 57.3750

    from torch.autograd import Variable

    now_image1 = Variable(torch.from_numpy(image).to(cfg.MODEL.DEVICE))
    dummy_input = now_image1.unsqueeze(0)
    #dummy_input = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)).to(cfg.MODEL.DEVICE)
    #dummy_input = dummy_input.unsqueeze(0)

    output_names = ["scores", "masks"]

    model.forward = model.forward_test

    torch.onnx.export(
        model,
        dummy_input,
        args.output,
        verbose=True,
        input_names=input_names,
        output_names=output_names,
        keep_initializers_as_inputs=False,
        opset_version=12,
    )
    import onnxsim
    import onnx

    # Checks
    model_onnx = onnx.load(args.output)  # load onnx model
    onnx.checker.check_model(model_onnx)  # check onnx model
    print(f'Simplifying with onnx-simplifier {onnxsim.__version__}.')
    model_onnx, check = onnxsim.simplify(
        model_onnx,
        dynamic_input_shape=False)
    assert check, 'assert check failed'
    onnx.save(model_onnx, args.output)

    from onnxruntime.datasets import get_example

    example_model = get_example(args.output)
    session = onnxruntime.InferenceSession(example_model)
    # get the name of the first input of the model
    input_name = session.get_inputs()[0].name
    # print('onnx Input Name:', input_name)
    pred_scores, pred_masks = session.run([], {input_name: dummy_input.data.cpu().numpy()})

    pred_masks = torch.as_tensor(pred_masks)
    pred_scores = torch.as_tensor(pred_scores)
    image_sizes=[(640,640)]
    max_shape=torch.Size([640,640])
    batched_inputs=dummy_input
    logger.info("Done. The onnx model is saved into {}.".format(args.output))
    predictions = []
    for _, (scores_per_image, mask_pred_per_image, batched_input, img_shape) in enumerate(zip(
            pred_scores, pred_masks, batched_inputs, image_sizes)):
        ori_shape = (source_img.shape[0], source_img.shape[1])
        from detectron2.structures import ImageList, Instances, BitMasks
        result = Instances(ori_shape)
        # max/argmax
        scores, labels = scores_per_image.max(dim=-1)
        # cls threshold
        keep = scores >0.005
        scores = scores[keep]
        labels = labels[keep]
        mask_pred_per_image = mask_pred_per_image[keep]

        if scores.size(0) == 0:
            result.scores = scores
            result.pred_classes = labels
            predictions.append(result)
            continue

        h, w = img_shape
        # rescoring mask using maskness
        def rescoring_mask(scores, mask_pred, masks):
            mask_pred_ = mask_pred.float()
            return scores * ((masks * mask_pred_).sum([1, 2]) / (mask_pred_.sum([1, 2]) + 1e-6))
        scores = rescoring_mask(
            scores, mask_pred_per_image > 0.45, mask_pred_per_image)

        # upsample the masks to the original resolution:
        # (1) upsampling the masks to the padded inputs, remove the padding area
        # (2) upsampling/downsampling the masks to the original sizes
        mask_pred_per_image = F.interpolate(
            mask_pred_per_image.unsqueeze(1), size=max_shape, mode="bilinear", align_corners=False)[:, :, :h, :w]
        mask_pred_per_image = F.interpolate(
            mask_pred_per_image, size=ori_shape, mode='bilinear', align_corners=False).squeeze(1)

        mask_pred = mask_pred_per_image > 0.45
        # fix the bug for visualization
        # mask_pred = BitMasks(mask_pred)

        # using Detectron2 Instances to store the final results
        result.pred_masks = mask_pred
        result.scores = scores
        result.pred_classes = labels
        predictions.append(result)
    processed_results = [{"instances": r} for r in predictions]
    from detectron2.utils.visualizer import ColorMode, Visualizer
    instance_mode = ColorMode.IMAGE
    from detectron2.data import MetadataCatalog
    metadata = MetadataCatalog.get(
        cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
    )
    visualizer = Visualizer(bgr, metadata,
                            instance_mode=instance_mode)
    if "instances" in processed_results[0]:
        instances = processed_results[0]["instances"].to(cfg.MODEL.DEVICE)
        instances = instances[instances.scores > 0.5]
        processed_results[0]["instances"] = instances
        vis_output = visualizer.draw_instance_predictions(
            predictions=instances.to("cpu"))
        vis_output.save("fuck.jpg")
if __name__ == "__main__":
    main()

同时将sparseinst.py中的

def forward_test(self, images):
        # for inference, onnx, tensorrt
        # input images: BxCxHxW, fixed, need padding size
        # normalize
        #images = (images - self.pixel_mean[None]) / self.pixel_std[None]　注释掉

转mnn模型要该optset_version=9　#否则存在转换错误relu6

@Echosanmao
pytorch -> onnx -> mnn碰过类似的错，记录如下

relu6在onnx的op是clip；
pytorch 转 onnx 有个opset_version 设置，不同配置转出来clip描述不一样，如果opset_version=9，可以转成relu6, 如果opset_version=11，就会报错；
差异在于提供maxvalue的方式改变了，opset_version=9是attr，11是input，当前mnn只能解析到attr的

这里存在一个问题在opt=12的onnx推理结果是好于opt=9的，在前后处理不变的情况下，所以仍然在转mnn存在效果差的问题，也可能我前处理处理太粗暴

onnx opt=12

50、实例分割SparseInst模型部署mnn、ncnn、rknn，完成业务需求_人工智能_05

onnx opt=9

50、实例分割SparseInst模型部署mnn、ncnn、rknn，完成业务需求_1024程序员节_06

实际测试图片

50、实例分割SparseInst模型部署mnn、ncnn、rknn，完成业务需求_人工智能_07

简化模型转ncnn、mnn、rknn

三、转mnn模型

ubuntu@ubuntu:~/MNN/build$ ./MNNConvert -f ONNX --modelFile /home/ubuntu/SparseInst/output/sparseinst.onnx --MNNModel /home/ubuntu/SparseInst/output/sparseinst_sim.mnn --bizCode MNN
Start to Convert Other Model Format To MNN Model...
[09:44:22] /home/ubuntu/MNN/tools/converter/source/onnx/onnxConverter.cpp:40: ONNX Model ir version: 6
Start to Optimize the MNN Net...
inputTensors : [ input_image, ]
outputTensors: [ masks, scores, ]

cmakelists.txt

cmake_minimum_required(VERSION 3.16)
project(SparseInstClion)
set(CMAKE_CXX_FLAGS "-std=c++11")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(CMAKE_CXX_STANDARD 11)
include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/include/MNN)
find_package(OpenCV REQUIRED)
#message(STATUS ${OpenCV_INCLUDE_DIRS})
#添加头文件
include_directories(${OpenCV_INCLUDE_DIRS})
#链接Opencv库

add_library(libmnn SHARED IMPORTED)
set_target_properties(libmnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libMNN.so)

add_executable(SparseInstClion main.cpp )
target_link_libraries(SparseInstClion ${OpenCV_LIBS}  libmnn )

main.cpp

#include <iostream>
#include <algorithm>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include<MNN/Interpreter.hpp>
#include<MNN/ImageProcess.hpp>
using namespace std;
using namespace cv;
int main(int argc, char **argv) {


    const char *classes[] = {
            "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
            "boat", "traffic-light", "fire-hydrant", "stop-sign", "parking-meter", "bench",
            "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
            "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
            "snowboard", "sports-ball", "kite", "baseball-bat", "baseball-glove", "skateboard",
            "surfboard", "tennis-racket", "bottle", "wine-glass", "cup", "fork", "knife",
            "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
            "hot-dog", "pizza", "donut", "cake", "chair", "sofa", "potted-plant", "bed",
            "dining-table", "toilet", "tv-monitor", "laptop", "mouse", "remote", "keyboard",
            "cell-phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
            "clock", "vase", "scissors", "teddy-bear", "hair-drier", "toothbrush"
    };
    static const unsigned char colors[81][3] = {
            {56, 0, 255},{226, 255, 0},{0, 94, 255},{0, 37, 255},{0, 255, 94},{255, 226, 0},{0, 18, 255},{255, 151, 0},{170, 0, 255},{0, 255, 56},{255, 0, 75},{0, 75, 255},
            {0, 255, 169},{255, 0, 207},{75, 255, 0},{207, 0, 255},{37, 0, 255},{0, 207, 255},{94, 0, 255},{0, 255, 113},{255, 18, 0},{255, 0, 56},{18, 0, 255},{0, 255, 226},
            {170, 255, 0},{255, 0, 245},{151, 255, 0},{132, 255, 0},{75, 0, 255},{151, 0, 255},{0, 151, 255},{132, 0, 255},
            {0, 255, 245},{255, 132, 0},{226, 0, 255},{255, 37, 0},{207, 255, 0},{0, 255, 207},{94, 255, 0},{0, 226, 255},{56, 255, 0},{255, 94, 0},
            {255, 113, 0},{0, 132, 255},{255, 0, 132},{255, 170, 0},{255, 0, 188},{113, 255, 0},{245, 0, 255},{113, 0, 255},{255, 188, 0},{0, 113, 255},
            {255, 0, 0},{0, 56, 255},{255, 0, 113},{0, 255, 188},{255, 0, 94},{255, 0, 18},{18, 255, 0},{0, 255, 132},{0, 188, 255},{0, 245, 255},{0, 169, 255},{37, 255, 0},
            {255, 0, 151},{188, 0, 255},{0, 255, 37},{0, 255, 0},{255, 0, 170},{255, 0, 37},{255, 75, 0},{0, 0, 255},{255, 207, 0},
            {255, 0, 226},{255, 245, 0},{188, 255, 0},{0, 255, 18},{0, 255, 75},{0, 255, 151},{255, 56, 0},{245, 255, 0}
    };

    cv::Mat bgr = cv::imread("/home/ubuntu/Downloads/rknn-toolkit-1.7.1/examples/onnx/yolov5/bus.jpg");;// 预处理和源码不太一样，所以影响了后面的

    int target_size = 640;

    cv::Mat resize_img;
    cv::resize(bgr, resize_img, cv::Size(target_size, target_size));
    float cls_threshold = 0.005;

    // MNN inference
    auto mnnNet = std::shared_ptr<MNN::Interpreter>(
            MNN::Interpreter::createFromFile("/home/ubuntu/SparseInst/output/sparseinst_sim.mnn"));
    auto t1 = std::chrono::steady_clock::now();
    MNN::ScheduleConfig netConfig;
    netConfig.type = MNN_FORWARD_CPU;
    netConfig.numThread = 4;

    auto session = mnnNet->createSession(netConfig);
    auto input = mnnNet->getSessionInput(session, nullptr);

    mnnNet->resizeTensor(input, {1, 3, (int) target_size, (int) target_size});
    mnnNet->resizeSession(session);
    MNN::CV::ImageProcess::Config config;

    const float mean_vals[3] = {123.6750f, 116.2800f, 103.5300f};

    const float norm_255[3] = {1.f / 58.3950, 1.f / 57.1200, 1.f / 57.3750};

    std::shared_ptr<MNN::CV::ImageProcess> pretreat(
            MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::RGB, mean_vals, 3,
                                          norm_255, 3));

    pretreat->convert(resize_img.data, (int) target_size, (int) target_size, resize_img.step[0], input);


    mnnNet->runSession(session);

    auto SparseInst_scores = mnnNet->getSessionOutput(session, "scores");

    MNN::Tensor scoresHost(SparseInst_scores, SparseInst_scores->getDimensionType());
    SparseInst_scores->copyToHostTensor(&scoresHost);
//
//    std::vector<float> vec_host_scores;
//    for (int i = 0; i < scoresHost.elementSize(); i++) {
//        vec_host_scores.emplace_back(scoresHost.host<float>()[i]);
//    }

    auto t2 = std::chrono::steady_clock::now();

    //毫秒级
    double dr_ms = std::chrono::duration<double, std::milli>(t2 - t1).count();
    std::cout << dr_ms << " ms" << std::endl;
    std::vector<float> vec_scores;
    std::vector<float> vec_new_scores;
    std::vector<int> vec_labels;
    int scoresHost_shape_c = scoresHost.channel();
    int scoresHost_shape_d = scoresHost.dimensions();
    int scoresHost_shape_w = scoresHost.width();
    int scoresHost_shape_h = scoresHost.height();

    printf("shape_d=%d shape_c=%d shape_h=%d shape_w=%d scoresHost.elementSize()=%d\n", scoresHost_shape_d,
           scoresHost_shape_c, scoresHost_shape_h, scoresHost_shape_w, scoresHost.elementSize());
    auto SparseInst_masks = mnnNet->getSessionOutput(session, "masks");

    MNN::Tensor masksHost(SparseInst_masks, SparseInst_masks->getDimensionType());
    SparseInst_masks->copyToHostTensor(&masksHost);

//    std::vector<float> vec_host_masks;
//    for (int i = 0; i < masksHost.elementSize(); i++) {
//        vec_host_masks.emplace_back(masksHost.host<float>()[i]);
//    }
    int masksHost_shape_c = masksHost.channel();
    int masksHost_shape_d = masksHost.dimensions();
    int masksHost_shape_w = masksHost.width();
    int masksHost_shape_h = masksHost.height();
    printf("shape_d=%d shape_c=%d shape_h=%d shape_w=%d masksHost.elementSize()=%d\n", masksHost_shape_d,
           masksHost_shape_c, masksHost_shape_h, masksHost_shape_w, masksHost.elementSize());


    std::vector<std::vector<int>> mask_pred;
    std::vector<std::vector<int>> mask_pred_per_image;
    for (int i = 0; i < scoresHost_shape_c; i++) {
        std::vector<float> item_score;
        std::vector<int> item_mask;
        std::vector<float> mask_pred_dot;
        for (int j = 0; j < scoresHost_shape_h; j++) {
            item_score.emplace_back(scoresHost.host<float>()[i * scoresHost_shape_h + j]);
        }
        float mask_value_0 = 0;
        float mask_value_1 = 0;
        for (int m = 0; m < masksHost_shape_h; m++) {
            for (int n = 0; n < masksHost_shape_w; n++) {
                float mask_value = masksHost.host<float>()[i * masksHost_shape_h * masksHost_shape_w +
                                                           m * masksHost_shape_w + n];

                int value_bl = mask_value > 0.45 ? 1 : 0;
                item_mask.emplace_back(value_bl);
                mask_value_0 = mask_value_0 + mask_value * value_bl;
                mask_value_1 = mask_value_1 + mask_value + 1e-6;
            }
        }


        float value = *max_element(item_score.begin(), item_score.end());
        auto valueIter = max_element(item_score.begin(), item_score.end());
        int index = distance(item_score.begin(), valueIter);
        if (value > cls_threshold) {
            vec_scores.emplace_back(value);
            vec_labels.emplace_back(index);
            mask_pred_per_image.emplace_back(item_mask); //        mask_pred = mask_pred_per_image > 0.45
            vec_new_scores.emplace_back(value * mask_value_0 / mask_value_1);
        }

    }

    if (vec_new_scores.size()) {
        for (int i = 0; i < vec_new_scores.size(); i++) {
            float score = vec_new_scores[i];
            if (score > 0.5) {
                cv::Mat gray_img=cv::Mat(resize_img.cols,resize_img.rows,CV_8UC3,cv::Scalar(255,255,255));
                int labelid = vec_labels[i];
                std::string label = classes[labelid];
                int startx=resize_img.cols;
                int starty=resize_img.rows;
                int endx=-1;
                int endy=-1;
                for (int m = 0; m < resize_img.cols; m++) {
                    uchar *p = resize_img.ptr(m);
                    uchar *q=gray_img.ptr(m);
                    for (int n = 0; n < resize_img.rows; n++) {
                        if (mask_pred_per_image[i][m * resize_img.rows + n]) {
                            startx=std::min(startx,m);
                            starty=std::min(starty,n);
                            endx=std::max(endx,m);
                            endy=std::max(endy,n);
                            p[0] = cv::saturate_cast<uchar>(p[0] * 0.5 + colors[i][0]* 0.5);
                            p[1] = cv::saturate_cast<uchar>(p[1] * 0.5 +  colors[i][1] * 0.5);
                            p[2] = cv::saturate_cast<uchar>(p[2] * 0.5 +  colors[i][2] * 0.5);

                            q[0] = cv::saturate_cast<uchar>(0);
                            q[1] = cv::saturate_cast<uchar>(0);
                            q[2] = cv::saturate_cast<uchar>(0);
                        }
                        p += 3;
                        q+=3;
                    }
                }
                cvtColor(gray_img, gray_img, cv::COLOR_BGR2GRAY);
                threshold(gray_img, gray_img, 0, 128, cv::THRESH_BINARY_INV);

                // 去除二值化图像的零星点
                cv::Mat kernel_ = getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(5, 5));
                cv::morphologyEx(gray_img, gray_img, cv::MORPH_CLOSE, kernel_, cv::Point(-1, -1), 2);
               std::vector<std::vector<cv::Point>> contours;
                findContours(gray_img, contours, cv::RETR_TREE, cv::CHAIN_APPROX_SIMPLE);

                //瞎画吧
                vector<vector<Point>> contours_ploy(contours.size()); // 逼近多边形点
                vector<Rect> ploy_rects(contours.size());             // 多边形框
                vector<Point2f> ccs(contours.size());                 // 圆中心点
                vector<float> radius(contours.size());                // 圆半径

                vector<RotatedRect> minRects(contours.size());
                vector<RotatedRect> myeliipse(contours.size());

                for (size_t i = 0; i < contours.size(); i++) {
                    approxPolyDP(Mat(contours[i]), contours_ploy[i], 3, true);
                    ploy_rects[i] = boundingRect(contours_ploy[i]);
                    minEnclosingCircle(contours_ploy[i], ccs[i], radius[i]);

                    if (contours_ploy[i].size() > 5) {
                        myeliipse[i] = fitEllipse(contours_ploy[i]);
                        minRects[i] = minAreaRect(contours_ploy[i]);
                    }
                }
                cv::Mat drawImg;
                gray_img.copyTo(drawImg);
                RNG rng(1234);
                Point2f pts[4];
                for (size_t t = 0; t < contours.size(); t++) {
                    Scalar color = Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));
                    rectangle(drawImg, ploy_rects[t], color, 2, 8);
                    circle(drawImg, ccs[t], radius[t], color, 2, 8);

                    if (contours_ploy[t].size() > 5) {
                        ellipse(drawImg, myeliipse[t], color, 1, 8);
                        minRects[t].points(pts);
                        for (int r = 0; r < 4; r++) {
                            line(drawImg, pts[r], pts[(r + 1) % 4], color, 2, 8);
                        }
                    }
                }
                imshow("drawImg", drawImg);

                cv::waitKey(0);
//            上面这些可以删掉，只为了画得标签好看一些
                 int centerx=ccs[0].x;
                int centery=ccs[0].y;
                char text[256];
                sprintf(text, "%s %.1f%%",label.c_str(), score * 100);
                cv::putText(resize_img, text, cv::Point(centerx,centery),
                            cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(colors[i][0], colors[i][1], colors[i][2]), 1, cv::LINE_AA);

            }

        }


    }
    cv::resize(resize_img,bgr,cv::Size(bgr.cols,bgr.rows));//用reisze 替代了下采样和上采样操作
    cv::imwrite("result.jpg",bgr);
    cv::imshow("image.jpg", bgr);
    cv::waitKey(0);

    mnnNet->releaseModel();
    mnnNet->releaseSession(session);
    return 0;
}

测试结果

50、实例分割SparseInst模型部署mnn、ncnn、rknn，完成业务需求_深度学习_08

其中转rknn模型的代码，dataset.txt多张图

from rknn.api import RKNN

ONNX_MODEL = '/home/ubuntu/SparseInst/output/sparseinst_opt12.onnx'
RKNN_MODEL = '/home/ubuntu/SparseInst/output/sparseinst_opt12.rknn'

if __name__ == '__main__':

    # Create RKNN object
    rknn = RKNN(verbose=True)

    # pre-process config
    print('--> config model')
    rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], reorder_channel='0 1 2',
                target_platform='rk3399pro',
                quantized_dtype='asymmetric_affine-u8',batch_size=1, optimization_level=3, output_optimize=1)
    print('done')

    print('--> Loading model')
    ret = rknn.load_onnx(model=ONNX_MODEL)
    if ret != 0:
        print('Load model  failed!')
        exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=True, dataset='dataset.txt')  # ,pre_compile=True
    if ret != 0:
        print('Build sparseinst_opt12 failed!')
        exit(ret)
    print('done')

    # Export rknn model
    print('--> Export RKNN model')
    ret = rknn.export_rknn(RKNN_MODEL)
    if ret != 0:
        print('Export sparseinst_opt12.rknn failed!')
        exit(ret)
    print('done')

    rknn.release()

生成的模型如下

ubuntu@ubuntu:~/SparseInst/output$ ls
sparseinst.onnx       sparseinst_sim.onnx  sparseinst_sim.bin         sparseinst_sim.param　sparseinst_sim.bin 　　sparseinst_sim.rknn
sparse_inst_r50_giam  sparseinst_sim.mnn

测试代码

import os
import urllib
import traceback
import time
from torch.nn import functional as F
import cv2
from rknn.api import RKNN
import torch
RKNN_MODEL = "sparseinst_opt12.rknn"
IMG_PATH = "img.png"
target_size=640
QUANTIZE_ON = True

def postprocess(src_img,pred_scores,pred_masks):
    pred_masks = torch.as_tensor(pred_masks)
    pred_scores = torch.as_tensor(pred_scores)
    image_sizes = [(target_size, target_size)]
    max_shape = torch.Size([target_size, target_size])
    pred_masks_list,scores_list,pred_classes_list = [],[],[]
    for _, (scores_per_image, mask_pred_per_image, img_shape) in enumerate(zip(
            pred_scores, pred_masks,  image_sizes)):
        ori_shape = (src_img.shape[0], src_img.shape[1])
        # max/argmax
        scores, labels = scores_per_image.max(dim=-1)
        # cls threshold
        keep = scores > 0.005  # 0.005
        scores = scores[keep]
        labels = labels[keep]
        mask_pred_per_image = mask_pred_per_image[keep]

        if scores.size(0) == 0:
            continue

        h, w = img_shape

        # rescoring mask using maskness
        def rescoring_mask(scores, mask_pred, masks):
            mask_pred_ = mask_pred.float()
            return scores * ((masks * mask_pred_).sum([1, 2]) / (mask_pred_.sum([1, 2]) + 1e-6))

        scores = rescoring_mask(
            scores, mask_pred_per_image > 0.45, mask_pred_per_image)

        # upsample the masks to the original resolution:
        # (1) upsampling the masks to the padded inputs, remove the padding area
        # (2) upsampling/downsampling the masks to the original sizes
        mask_pred_per_image = F.interpolate(
            mask_pred_per_image.unsqueeze(1), size=max_shape, mode="bilinear", align_corners=False)[:, :, :h, :w]
        mask_pred_per_image = F.interpolate(
            mask_pred_per_image, size=ori_shape, mode='bilinear', align_corners=False).squeeze(1)

        mask_pred = mask_pred_per_image > 0.45
        pred_masks_list.append(mask_pred)
        scores_list.append(scores)
        pred_classes_list.append(labels)
    real_score = []
    real_mask = []
    real_class = []
    for mask_item, score_item, classes_item in zip(pred_masks_list, scores_list, pred_classes_list):
        for idx, item in enumerate(score_item.tolist()):
            if item > 0.5:
                real_score.append(item)
                real_mask.append(mask_item.tolist()[idx])
                real_class.append(classes_item.tolist()[idx])
    print(real_score,real_class)
#
# def get_output_shape( oldh, oldw, short_edge_length, max_size) :
#     """
#     Compute the output size given input size and target short edge length.
#     """
#     h, w = oldh, oldw
#     size = short_edge_length * 1.0
#     scale = size / min(h, w)
#     if h < w:
#         newh, neww = size, scale * w
#     else:
#         newh, neww = scale * h, size
#     if max(newh, neww) > max_size:
#         scale = max_size * 1.0 / max(newh, neww)
#         newh = newh * scale
#         neww = neww * scale
#     neww = int(neww + 0.5)
#     newh = int(newh + 0.5)
#     return (newh, neww)

if __name__ == "__main__":

    # Create RKNN object
    rknn = RKNN()

    if not os.path.exists(RKNN_MODEL):
        print("model not exist")
        exit(-1)

    # Load ONNX model
    print("--> Loading model")
    ret = rknn.load_rknn(RKNN_MODEL)
    if ret != 0:
        print("Load rknn model failed!")
        exit(ret)
    print("done")

    # init runtime environment
    print("--> Init runtime environment")
    ret = rknn.init_runtime()
    if ret != 0:
        print("Init runtime environment failed")
        exit(ret)
    print("done")
    target=(target_size,target_size)
    #size_divisibility=32
    src_img = cv2.imread(IMG_PATH)
    # oldh=src_img.cols
    # oldw=src_img.rows
    # short_edge_length=32
    # max_size=853
    # max_size=get_output_shape( oldh, oldw, short_edge_length, max_size)
    #
    # if size_divisibility > 1:
    #     stride = size_divisibility
    #     # the last two dims are H,W, both subject to divisibility requirement
    #     max_size = (max_size + (stride - 1)) // stride * stride
    # borderType = cv2.BORDER_REPLICATE
    # dst2 = cv2.copyMakeBorder(src, top, bottom, left, right, borderType, None, value)

    res_img=cv2.resize(src_img,target)
    img = cv2.cvtColor(res_img, cv2.COLOR_BGR2RGB)  # hwc rgb

    print("--> Running model")
    start = time.clock()
    score,mask= rknn.inference(inputs=[img])
    # 获取结束时间
    postprocess(src_img,score, mask)
    end = time.clock()
    # 计算运行时间
    runTime = end - start
    runTime_ms = runTime * 1000
    # 输出运行时间
    print("运行时间：", runTime_ms, "毫秒")
    rknn.release()

pyrknn测试结果精度好低啊，可能我的问题，待调查

/home/ubuntu/miniconda3/envs/rknnpy36/bin/python /home/ubuntu/PycharmProjects/pythonProject2/infer.py
--> Loading model
done
--> Init runtime environment
librknn_runtime version 1.7.3 (5047ff8 build: 2022-08-13 12:11:22 base: 1131)




done
--> Running model
[0.5580335259437561, 0.6662011742591858] [29, 16]
运行时间： 595974.0290000001 毫秒

Process finished with exit code 0

验证可行，代码待贴

c++

验证可行，代码待贴

ncnn的测试结果，目前验证中