基本思想:看了金东大佬的知乎,羡慕其检测速度和效果,业务存在对实例分割的需求,需要移植嵌入式开发板上,趁国庆节休息几天,搞一下几个例子,部署一下
一、下载官方代码,测试一下
首先配置一下环境
git clone https://github.com/facebookresearch/detectron2.git
# if you swith to a specific version, e.g., v0.3 (recommended) or v0.6
git checkout tags/v0.6
# build detectron2
python setup.py build develop --user
然后测试一下
ubuntu@ubuntu:~$ git clone https://github.com/hustvl/SparseInst.git
ubuntu@ubuntu:~/SparseInst$ python demo.py --config-file configs/sparse_inst_r50_giam.yaml --input /home/ubuntu/Downloads/rknn-toolkit-1.7.1/examples/onnx/yolov5/bus.jpg --output results --opts MODEL.WEIGHTS sparse_inst_r50_giam_ceaffc.pth
测试结果
二、官方提供了转换onnx的代码
ubuntu@ubuntu:~/SparseInst$ python3 onnx/convert_onnx.py --config-file configs/sparse_inst_r50_giam.yaml --width 640 --height 640 --output output/sparseinst.onnx --opts MODEL.WEIGHTS sparse_inst_r50_giam_ceaffc.pth
修改了官方的代码convertonnx.py代码
import math
import argparse
import onnxruntime
import torch
from torch import nn
from torch.nn import functional as F
from detectron2.layers import Conv2d
from detectron2.utils.logger import setup_logger
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from sparseinst import add_sparse_inst_config
class PyramidPoolingModuleONNX(nn.Module):
def __init__(self, in_channels, channels, input_size, pool_sizes=(1, 2, 3, 6)):
super().__init__()
self.stages = []
self.stages = nn.ModuleList(
[self._make_stage(in_channels, channels, input_size, pool_size)
for pool_size in pool_sizes]
)
self.bottleneck = Conv2d(
in_channels + len(pool_sizes) * channels, in_channels, 1)
def _make_stage(self, features, out_features, input_size, pool_size):
stride_y = math.floor((input_size[0] / pool_size))
stride_x = math.floor((input_size[1] / pool_size))
kernel_y = input_size[0] - (pool_size - 1) * stride_y
kernel_x = input_size[1] - (pool_size - 1) * stride_x
prior = nn.AvgPool2d(kernel_size=(
kernel_y, kernel_x), stride=(stride_y, stride_x))
conv = Conv2d(features, out_features, 1)
return nn.Sequential(prior, conv)
def forward(self, feats):
h, w = feats.size(2), feats.size(3)
priors = [F.interpolate(
input=F.relu_(stage(feats)), size=(h, w), mode='bilinear', align_corners=False) for stage in self.stages] + [feats]
out = F.relu_(self.bottleneck(torch.cat(priors, 1)))
return out
def main():
parser = argparse.ArgumentParser(
description="Export model to the onnx format")
parser.add_argument(
"--config-file",
default="configs/sparse_inst_r50_giam.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument('--width', default=640, type=int)
parser.add_argument('--height', default=640, type=int)
parser.add_argument('--level', default=0, type=int)
parser.add_argument(
"--output",
default="output/sparseinst.onnx",
metavar="FILE",
help="path to the output onnx file",
)
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
cfg = get_cfg()
add_sparse_inst_config(cfg)
args = parser.parse_args()
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
# norm for ONNX: change FrozenBN back to BN
cfg.MODEL.BACKBONE.FREEZE_AT = 0
cfg.MODEL.RESNETS.NORM = "BN"
cfg.freeze()
output_dir = cfg.OUTPUT_DIR
logger = setup_logger(output=output_dir)
logger.info(cfg)
height = args.height
width = args.width
model = build_model(cfg)
num_channels = cfg.MODEL.SPARSE_INST.ENCODER.NUM_CHANNELS
onnx_ppm = PyramidPoolingModuleONNX(
num_channels, num_channels // 4, (height // 32, width // 32))
model.encoder.ppm = onnx_ppm
model.to(cfg.MODEL.DEVICE)
logger.info("Model:\n{}".format(model))
checkpointer = DetectionCheckpointer(model)
_ = checkpointer.load(cfg.MODEL.WEIGHTS)
logger.info("load Model:\n{}".format(cfg.MODEL.WEIGHTS))
input_names = ["input_image"]
dummy_input = torch.zeros((1, 3, height, width))
from PIL import Image
import cv2
source_img=cv2.imread("../0.jpeg")
bgr = cv2.cvtColor(source_img, cv2.COLOR_BGR2RGB)
# max_size=853
# short_edge_length=640
# oldh,oldw=source_img.shape[0],source_img.shape[1]
# h, w = oldh, oldw
# size = short_edge_length * 1.0
# scale = size / min(h, w)
# if h < w:
# newh, neww = size, scale * w
# else:
# newh, neww = scale * h, size
# if max(newh, neww) > max_size:
# scale = max_size * 1.0 / max(newh, neww)
# newh = newh * scale
# neww = neww * scale
# neww = int(neww + 0.5)
# newh = int(newh + 0.5)
rgb = cv2.resize(bgr, (640, 640))
# image_sizes=[neww,newh]
# max_size=[0,0]
# size_divisibility=32
# if size_divisibility > 1:
# stride = size_divisibility
# # the last two dims are H,W, both subject to divisibility requirement
# max_size[0] = (image_sizes[0] + (stride - 1)) // stride * stride
# max_size[1] = (image_sizes[1] + (stride - 1)) // stride * stride
#
# padding_size = [0, max_size[0]- image_sizes[0], 0, max_size[1] - image_sizes[1]]
# rgb=cv2.copyMakeBorder(rgb, padding_size[0], padding_size[2], padding_size[1], padding_size[3], cv2.BORDER_CONSTANT,(0, 0, 0))
import numpy as np
image = np.transpose(rgb, (2, 0, 1)).astype(np.float32) # chw rgb
image[0, ...] = (image[0, ...] - 123.6750) / 58.3950
image[1, ...] = (image[1, ...] - 116.2800) / 57.1200
image[2, ...] = (image[2, ...] - 103.5300) / 57.3750
from torch.autograd import Variable
now_image1 = Variable(torch.from_numpy(image).to(cfg.MODEL.DEVICE))
dummy_input = now_image1.unsqueeze(0)
#dummy_input = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)).to(cfg.MODEL.DEVICE)
#dummy_input = dummy_input.unsqueeze(0)
output_names = ["scores", "masks"]
model.forward = model.forward_test
torch.onnx.export(
model,
dummy_input,
args.output,
verbose=True,
input_names=input_names,
output_names=output_names,
keep_initializers_as_inputs=False,
opset_version=12,
)
import onnxsim
import onnx
# Checks
model_onnx = onnx.load(args.output) # load onnx model
onnx.checker.check_model(model_onnx) # check onnx model
print(f'Simplifying with onnx-simplifier {onnxsim.__version__}.')
model_onnx, check = onnxsim.simplify(
model_onnx,
dynamic_input_shape=False)
assert check, 'assert check failed'
onnx.save(model_onnx, args.output)
from onnxruntime.datasets import get_example
example_model = get_example(args.output)
session = onnxruntime.InferenceSession(example_model)
# get the name of the first input of the model
input_name = session.get_inputs()[0].name
# print('onnx Input Name:', input_name)
pred_scores, pred_masks = session.run([], {input_name: dummy_input.data.cpu().numpy()})
pred_masks = torch.as_tensor(pred_masks)
pred_scores = torch.as_tensor(pred_scores)
image_sizes=[(640,640)]
max_shape=torch.Size([640,640])
batched_inputs=dummy_input
logger.info("Done. The onnx model is saved into {}.".format(args.output))
predictions = []
for _, (scores_per_image, mask_pred_per_image, batched_input, img_shape) in enumerate(zip(
pred_scores, pred_masks, batched_inputs, image_sizes)):
ori_shape = (source_img.shape[0], source_img.shape[1])
from detectron2.structures import ImageList, Instances, BitMasks
result = Instances(ori_shape)
# max/argmax
scores, labels = scores_per_image.max(dim=-1)
# cls threshold
keep = scores >0.005
scores = scores[keep]
labels = labels[keep]
mask_pred_per_image = mask_pred_per_image[keep]
if scores.size(0) == 0:
result.scores = scores
result.pred_classes = labels
predictions.append(result)
continue
h, w = img_shape
# rescoring mask using maskness
def rescoring_mask(scores, mask_pred, masks):
mask_pred_ = mask_pred.float()
return scores * ((masks * mask_pred_).sum([1, 2]) / (mask_pred_.sum([1, 2]) + 1e-6))
scores = rescoring_mask(
scores, mask_pred_per_image > 0.45, mask_pred_per_image)
# upsample the masks to the original resolution:
# (1) upsampling the masks to the padded inputs, remove the padding area
# (2) upsampling/downsampling the masks to the original sizes
mask_pred_per_image = F.interpolate(
mask_pred_per_image.unsqueeze(1), size=max_shape, mode="bilinear", align_corners=False)[:, :, :h, :w]
mask_pred_per_image = F.interpolate(
mask_pred_per_image, size=ori_shape, mode='bilinear', align_corners=False).squeeze(1)
mask_pred = mask_pred_per_image > 0.45
# fix the bug for visualization
# mask_pred = BitMasks(mask_pred)
# using Detectron2 Instances to store the final results
result.pred_masks = mask_pred
result.scores = scores
result.pred_classes = labels
predictions.append(result)
processed_results = [{"instances": r} for r in predictions]
from detectron2.utils.visualizer import ColorMode, Visualizer
instance_mode = ColorMode.IMAGE
from detectron2.data import MetadataCatalog
metadata = MetadataCatalog.get(
cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
)
visualizer = Visualizer(bgr, metadata,
instance_mode=instance_mode)
if "instances" in processed_results[0]:
instances = processed_results[0]["instances"].to(cfg.MODEL.DEVICE)
instances = instances[instances.scores > 0.5]
processed_results[0]["instances"] = instances
vis_output = visualizer.draw_instance_predictions(
predictions=instances.to("cpu"))
vis_output.save("fuck.jpg")
if __name__ == "__main__":
main()
同时将sparseinst.py中的
def forward_test(self, images):
# for inference, onnx, tensorrt
# input images: BxCxHxW, fixed, need padding size
# normalize
#images = (images - self.pixel_mean[None]) / self.pixel_std[None] 注释掉
转mnn模型要该optset_version=9 #否则存在转换错误relu6
@Echosanmao
pytorch -> onnx -> mnn碰过类似的错,记录如下
relu6在onnx的op是clip;
pytorch 转 onnx 有个opset_version 设置,不同配置转出来clip描述不一样,如果opset_version=9,可以转成relu6, 如果opset_version=11,就会报错;
差异在于提供maxvalue的方式改变了,opset_version=9是attr,11是input,当前mnn只能解析到attr的
这里存在一个问题在opt=12的onnx推理结果是好于opt=9的,在前后处理不变的情况下,所以仍然在转mnn存在效果差的问题,也可能我前处理处理太粗暴
onnx opt=12
onnx opt=9
实际测试图片
简化模型转ncnn、mnn、rknn
三、转mnn模型
ubuntu@ubuntu:~/MNN/build$ ./MNNConvert -f ONNX --modelFile /home/ubuntu/SparseInst/output/sparseinst.onnx --MNNModel /home/ubuntu/SparseInst/output/sparseinst_sim.mnn --bizCode MNN
Start to Convert Other Model Format To MNN Model...
[09:44:22] /home/ubuntu/MNN/tools/converter/source/onnx/onnxConverter.cpp:40: ONNX Model ir version: 6
Start to Optimize the MNN Net...
inputTensors : [ input_image, ]
outputTensors: [ masks, scores, ]
cmakelists.txt
cmake_minimum_required(VERSION 3.16)
project(SparseInstClion)
set(CMAKE_CXX_FLAGS "-std=c++11")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(CMAKE_CXX_STANDARD 11)
include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/include/MNN)
find_package(OpenCV REQUIRED)
#message(STATUS ${OpenCV_INCLUDE_DIRS})
#添加头文件
include_directories(${OpenCV_INCLUDE_DIRS})
#链接Opencv库
add_library(libmnn SHARED IMPORTED)
set_target_properties(libmnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libMNN.so)
add_executable(SparseInstClion main.cpp )
target_link_libraries(SparseInstClion ${OpenCV_LIBS} libmnn )
main.cpp
#include <iostream>
#include <algorithm>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include<MNN/Interpreter.hpp>
#include<MNN/ImageProcess.hpp>
using namespace std;
using namespace cv;
int main(int argc, char **argv) {
const char *classes[] = {
"person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
"boat", "traffic-light", "fire-hydrant", "stop-sign", "parking-meter", "bench",
"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
"giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
"snowboard", "sports-ball", "kite", "baseball-bat", "baseball-glove", "skateboard",
"surfboard", "tennis-racket", "bottle", "wine-glass", "cup", "fork", "knife",
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
"hot-dog", "pizza", "donut", "cake", "chair", "sofa", "potted-plant", "bed",
"dining-table", "toilet", "tv-monitor", "laptop", "mouse", "remote", "keyboard",
"cell-phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
"clock", "vase", "scissors", "teddy-bear", "hair-drier", "toothbrush"
};
static const unsigned char colors[81][3] = {
{56, 0, 255},{226, 255, 0},{0, 94, 255},{0, 37, 255},{0, 255, 94},{255, 226, 0},{0, 18, 255},{255, 151, 0},{170, 0, 255},{0, 255, 56},{255, 0, 75},{0, 75, 255},
{0, 255, 169},{255, 0, 207},{75, 255, 0},{207, 0, 255},{37, 0, 255},{0, 207, 255},{94, 0, 255},{0, 255, 113},{255, 18, 0},{255, 0, 56},{18, 0, 255},{0, 255, 226},
{170, 255, 0},{255, 0, 245},{151, 255, 0},{132, 255, 0},{75, 0, 255},{151, 0, 255},{0, 151, 255},{132, 0, 255},
{0, 255, 245},{255, 132, 0},{226, 0, 255},{255, 37, 0},{207, 255, 0},{0, 255, 207},{94, 255, 0},{0, 226, 255},{56, 255, 0},{255, 94, 0},
{255, 113, 0},{0, 132, 255},{255, 0, 132},{255, 170, 0},{255, 0, 188},{113, 255, 0},{245, 0, 255},{113, 0, 255},{255, 188, 0},{0, 113, 255},
{255, 0, 0},{0, 56, 255},{255, 0, 113},{0, 255, 188},{255, 0, 94},{255, 0, 18},{18, 255, 0},{0, 255, 132},{0, 188, 255},{0, 245, 255},{0, 169, 255},{37, 255, 0},
{255, 0, 151},{188, 0, 255},{0, 255, 37},{0, 255, 0},{255, 0, 170},{255, 0, 37},{255, 75, 0},{0, 0, 255},{255, 207, 0},
{255, 0, 226},{255, 245, 0},{188, 255, 0},{0, 255, 18},{0, 255, 75},{0, 255, 151},{255, 56, 0},{245, 255, 0}
};
cv::Mat bgr = cv::imread("/home/ubuntu/Downloads/rknn-toolkit-1.7.1/examples/onnx/yolov5/bus.jpg");;// 预处理和源码不太一样,所以影响了后面的
int target_size = 640;
cv::Mat resize_img;
cv::resize(bgr, resize_img, cv::Size(target_size, target_size));
float cls_threshold = 0.005;
// MNN inference
auto mnnNet = std::shared_ptr<MNN::Interpreter>(
MNN::Interpreter::createFromFile("/home/ubuntu/SparseInst/output/sparseinst_sim.mnn"));
auto t1 = std::chrono::steady_clock::now();
MNN::ScheduleConfig netConfig;
netConfig.type = MNN_FORWARD_CPU;
netConfig.numThread = 4;
auto session = mnnNet->createSession(netConfig);
auto input = mnnNet->getSessionInput(session, nullptr);
mnnNet->resizeTensor(input, {1, 3, (int) target_size, (int) target_size});
mnnNet->resizeSession(session);
MNN::CV::ImageProcess::Config config;
const float mean_vals[3] = {123.6750f, 116.2800f, 103.5300f};
const float norm_255[3] = {1.f / 58.3950, 1.f / 57.1200, 1.f / 57.3750};
std::shared_ptr<MNN::CV::ImageProcess> pretreat(
MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::RGB, mean_vals, 3,
norm_255, 3));
pretreat->convert(resize_img.data, (int) target_size, (int) target_size, resize_img.step[0], input);
mnnNet->runSession(session);
auto SparseInst_scores = mnnNet->getSessionOutput(session, "scores");
MNN::Tensor scoresHost(SparseInst_scores, SparseInst_scores->getDimensionType());
SparseInst_scores->copyToHostTensor(&scoresHost);
//
// std::vector<float> vec_host_scores;
// for (int i = 0; i < scoresHost.elementSize(); i++) {
// vec_host_scores.emplace_back(scoresHost.host<float>()[i]);
// }
auto t2 = std::chrono::steady_clock::now();
//毫秒级
double dr_ms = std::chrono::duration<double, std::milli>(t2 - t1).count();
std::cout << dr_ms << " ms" << std::endl;
std::vector<float> vec_scores;
std::vector<float> vec_new_scores;
std::vector<int> vec_labels;
int scoresHost_shape_c = scoresHost.channel();
int scoresHost_shape_d = scoresHost.dimensions();
int scoresHost_shape_w = scoresHost.width();
int scoresHost_shape_h = scoresHost.height();
printf("shape_d=%d shape_c=%d shape_h=%d shape_w=%d scoresHost.elementSize()=%d\n", scoresHost_shape_d,
scoresHost_shape_c, scoresHost_shape_h, scoresHost_shape_w, scoresHost.elementSize());
auto SparseInst_masks = mnnNet->getSessionOutput(session, "masks");
MNN::Tensor masksHost(SparseInst_masks, SparseInst_masks->getDimensionType());
SparseInst_masks->copyToHostTensor(&masksHost);
// std::vector<float> vec_host_masks;
// for (int i = 0; i < masksHost.elementSize(); i++) {
// vec_host_masks.emplace_back(masksHost.host<float>()[i]);
// }
int masksHost_shape_c = masksHost.channel();
int masksHost_shape_d = masksHost.dimensions();
int masksHost_shape_w = masksHost.width();
int masksHost_shape_h = masksHost.height();
printf("shape_d=%d shape_c=%d shape_h=%d shape_w=%d masksHost.elementSize()=%d\n", masksHost_shape_d,
masksHost_shape_c, masksHost_shape_h, masksHost_shape_w, masksHost.elementSize());
std::vector<std::vector<int>> mask_pred;
std::vector<std::vector<int>> mask_pred_per_image;
for (int i = 0; i < scoresHost_shape_c; i++) {
std::vector<float> item_score;
std::vector<int> item_mask;
std::vector<float> mask_pred_dot;
for (int j = 0; j < scoresHost_shape_h; j++) {
item_score.emplace_back(scoresHost.host<float>()[i * scoresHost_shape_h + j]);
}
float mask_value_0 = 0;
float mask_value_1 = 0;
for (int m = 0; m < masksHost_shape_h; m++) {
for (int n = 0; n < masksHost_shape_w; n++) {
float mask_value = masksHost.host<float>()[i * masksHost_shape_h * masksHost_shape_w +
m * masksHost_shape_w + n];
int value_bl = mask_value > 0.45 ? 1 : 0;
item_mask.emplace_back(value_bl);
mask_value_0 = mask_value_0 + mask_value * value_bl;
mask_value_1 = mask_value_1 + mask_value + 1e-6;
}
}
float value = *max_element(item_score.begin(), item_score.end());
auto valueIter = max_element(item_score.begin(), item_score.end());
int index = distance(item_score.begin(), valueIter);
if (value > cls_threshold) {
vec_scores.emplace_back(value);
vec_labels.emplace_back(index);
mask_pred_per_image.emplace_back(item_mask); // mask_pred = mask_pred_per_image > 0.45
vec_new_scores.emplace_back(value * mask_value_0 / mask_value_1);
}
}
if (vec_new_scores.size()) {
for (int i = 0; i < vec_new_scores.size(); i++) {
float score = vec_new_scores[i];
if (score > 0.5) {
cv::Mat gray_img=cv::Mat(resize_img.cols,resize_img.rows,CV_8UC3,cv::Scalar(255,255,255));
int labelid = vec_labels[i];
std::string label = classes[labelid];
int startx=resize_img.cols;
int starty=resize_img.rows;
int endx=-1;
int endy=-1;
for (int m = 0; m < resize_img.cols; m++) {
uchar *p = resize_img.ptr(m);
uchar *q=gray_img.ptr(m);
for (int n = 0; n < resize_img.rows; n++) {
if (mask_pred_per_image[i][m * resize_img.rows + n]) {
startx=std::min(startx,m);
starty=std::min(starty,n);
endx=std::max(endx,m);
endy=std::max(endy,n);
p[0] = cv::saturate_cast<uchar>(p[0] * 0.5 + colors[i][0]* 0.5);
p[1] = cv::saturate_cast<uchar>(p[1] * 0.5 + colors[i][1] * 0.5);
p[2] = cv::saturate_cast<uchar>(p[2] * 0.5 + colors[i][2] * 0.5);
q[0] = cv::saturate_cast<uchar>(0);
q[1] = cv::saturate_cast<uchar>(0);
q[2] = cv::saturate_cast<uchar>(0);
}
p += 3;
q+=3;
}
}
cvtColor(gray_img, gray_img, cv::COLOR_BGR2GRAY);
threshold(gray_img, gray_img, 0, 128, cv::THRESH_BINARY_INV);
// 去除二值化图像的零星点
cv::Mat kernel_ = getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(5, 5));
cv::morphologyEx(gray_img, gray_img, cv::MORPH_CLOSE, kernel_, cv::Point(-1, -1), 2);
std::vector<std::vector<cv::Point>> contours;
findContours(gray_img, contours, cv::RETR_TREE, cv::CHAIN_APPROX_SIMPLE);
//瞎画吧
vector<vector<Point>> contours_ploy(contours.size()); // 逼近多边形点
vector<Rect> ploy_rects(contours.size()); // 多边形框
vector<Point2f> ccs(contours.size()); // 圆中心点
vector<float> radius(contours.size()); // 圆半径
vector<RotatedRect> minRects(contours.size());
vector<RotatedRect> myeliipse(contours.size());
for (size_t i = 0; i < contours.size(); i++) {
approxPolyDP(Mat(contours[i]), contours_ploy[i], 3, true);
ploy_rects[i] = boundingRect(contours_ploy[i]);
minEnclosingCircle(contours_ploy[i], ccs[i], radius[i]);
if (contours_ploy[i].size() > 5) {
myeliipse[i] = fitEllipse(contours_ploy[i]);
minRects[i] = minAreaRect(contours_ploy[i]);
}
}
cv::Mat drawImg;
gray_img.copyTo(drawImg);
RNG rng(1234);
Point2f pts[4];
for (size_t t = 0; t < contours.size(); t++) {
Scalar color = Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));
rectangle(drawImg, ploy_rects[t], color, 2, 8);
circle(drawImg, ccs[t], radius[t], color, 2, 8);
if (contours_ploy[t].size() > 5) {
ellipse(drawImg, myeliipse[t], color, 1, 8);
minRects[t].points(pts);
for (int r = 0; r < 4; r++) {
line(drawImg, pts[r], pts[(r + 1) % 4], color, 2, 8);
}
}
}
imshow("drawImg", drawImg);
cv::waitKey(0);
// 上面这些可以删掉,只为了画得标签好看一些
int centerx=ccs[0].x;
int centery=ccs[0].y;
char text[256];
sprintf(text, "%s %.1f%%",label.c_str(), score * 100);
cv::putText(resize_img, text, cv::Point(centerx,centery),
cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(colors[i][0], colors[i][1], colors[i][2]), 1, cv::LINE_AA);
}
}
}
cv::resize(resize_img,bgr,cv::Size(bgr.cols,bgr.rows));//用reisze 替代了下采样和上采样操作
cv::imwrite("result.jpg",bgr);
cv::imshow("image.jpg", bgr);
cv::waitKey(0);
mnnNet->releaseModel();
mnnNet->releaseSession(session);
return 0;
}
测试结果
其中转rknn模型的代码,dataset.txt多张图
from rknn.api import RKNN
ONNX_MODEL = '/home/ubuntu/SparseInst/output/sparseinst_opt12.onnx'
RKNN_MODEL = '/home/ubuntu/SparseInst/output/sparseinst_opt12.rknn'
if __name__ == '__main__':
# Create RKNN object
rknn = RKNN(verbose=True)
# pre-process config
print('--> config model')
rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], reorder_channel='0 1 2',
target_platform='rk3399pro',
quantized_dtype='asymmetric_affine-u8',batch_size=1, optimization_level=3, output_optimize=1)
print('done')
print('--> Loading model')
ret = rknn.load_onnx(model=ONNX_MODEL)
if ret != 0:
print('Load model failed!')
exit(ret)
print('done')
# Build model
print('--> Building model')
ret = rknn.build(do_quantization=True, dataset='dataset.txt') # ,pre_compile=True
if ret != 0:
print('Build sparseinst_opt12 failed!')
exit(ret)
print('done')
# Export rknn model
print('--> Export RKNN model')
ret = rknn.export_rknn(RKNN_MODEL)
if ret != 0:
print('Export sparseinst_opt12.rknn failed!')
exit(ret)
print('done')
rknn.release()
生成的模型如下
ubuntu@ubuntu:~/SparseInst/output$ ls
sparseinst.onnx sparseinst_sim.onnx sparseinst_sim.bin sparseinst_sim.param sparseinst_sim.bin sparseinst_sim.rknn
sparse_inst_r50_giam sparseinst_sim.mnn
测试代码
import os
import urllib
import traceback
import time
from torch.nn import functional as F
import cv2
from rknn.api import RKNN
import torch
RKNN_MODEL = "sparseinst_opt12.rknn"
IMG_PATH = "img.png"
target_size=640
QUANTIZE_ON = True
def postprocess(src_img,pred_scores,pred_masks):
pred_masks = torch.as_tensor(pred_masks)
pred_scores = torch.as_tensor(pred_scores)
image_sizes = [(target_size, target_size)]
max_shape = torch.Size([target_size, target_size])
pred_masks_list,scores_list,pred_classes_list = [],[],[]
for _, (scores_per_image, mask_pred_per_image, img_shape) in enumerate(zip(
pred_scores, pred_masks, image_sizes)):
ori_shape = (src_img.shape[0], src_img.shape[1])
# max/argmax
scores, labels = scores_per_image.max(dim=-1)
# cls threshold
keep = scores > 0.005 # 0.005
scores = scores[keep]
labels = labels[keep]
mask_pred_per_image = mask_pred_per_image[keep]
if scores.size(0) == 0:
continue
h, w = img_shape
# rescoring mask using maskness
def rescoring_mask(scores, mask_pred, masks):
mask_pred_ = mask_pred.float()
return scores * ((masks * mask_pred_).sum([1, 2]) / (mask_pred_.sum([1, 2]) + 1e-6))
scores = rescoring_mask(
scores, mask_pred_per_image > 0.45, mask_pred_per_image)
# upsample the masks to the original resolution:
# (1) upsampling the masks to the padded inputs, remove the padding area
# (2) upsampling/downsampling the masks to the original sizes
mask_pred_per_image = F.interpolate(
mask_pred_per_image.unsqueeze(1), size=max_shape, mode="bilinear", align_corners=False)[:, :, :h, :w]
mask_pred_per_image = F.interpolate(
mask_pred_per_image, size=ori_shape, mode='bilinear', align_corners=False).squeeze(1)
mask_pred = mask_pred_per_image > 0.45
pred_masks_list.append(mask_pred)
scores_list.append(scores)
pred_classes_list.append(labels)
real_score = []
real_mask = []
real_class = []
for mask_item, score_item, classes_item in zip(pred_masks_list, scores_list, pred_classes_list):
for idx, item in enumerate(score_item.tolist()):
if item > 0.5:
real_score.append(item)
real_mask.append(mask_item.tolist()[idx])
real_class.append(classes_item.tolist()[idx])
print(real_score,real_class)
#
# def get_output_shape( oldh, oldw, short_edge_length, max_size) :
# """
# Compute the output size given input size and target short edge length.
# """
# h, w = oldh, oldw
# size = short_edge_length * 1.0
# scale = size / min(h, w)
# if h < w:
# newh, neww = size, scale * w
# else:
# newh, neww = scale * h, size
# if max(newh, neww) > max_size:
# scale = max_size * 1.0 / max(newh, neww)
# newh = newh * scale
# neww = neww * scale
# neww = int(neww + 0.5)
# newh = int(newh + 0.5)
# return (newh, neww)
if __name__ == "__main__":
# Create RKNN object
rknn = RKNN()
if not os.path.exists(RKNN_MODEL):
print("model not exist")
exit(-1)
# Load ONNX model
print("--> Loading model")
ret = rknn.load_rknn(RKNN_MODEL)
if ret != 0:
print("Load rknn model failed!")
exit(ret)
print("done")
# init runtime environment
print("--> Init runtime environment")
ret = rknn.init_runtime()
if ret != 0:
print("Init runtime environment failed")
exit(ret)
print("done")
target=(target_size,target_size)
#size_divisibility=32
src_img = cv2.imread(IMG_PATH)
# oldh=src_img.cols
# oldw=src_img.rows
# short_edge_length=32
# max_size=853
# max_size=get_output_shape( oldh, oldw, short_edge_length, max_size)
#
# if size_divisibility > 1:
# stride = size_divisibility
# # the last two dims are H,W, both subject to divisibility requirement
# max_size = (max_size + (stride - 1)) // stride * stride
# borderType = cv2.BORDER_REPLICATE
# dst2 = cv2.copyMakeBorder(src, top, bottom, left, right, borderType, None, value)
res_img=cv2.resize(src_img,target)
img = cv2.cvtColor(res_img, cv2.COLOR_BGR2RGB) # hwc rgb
print("--> Running model")
start = time.clock()
score,mask= rknn.inference(inputs=[img])
# 获取结束时间
postprocess(src_img,score, mask)
end = time.clock()
# 计算运行时间
runTime = end - start
runTime_ms = runTime * 1000
# 输出运行时间
print("运行时间:", runTime_ms, "毫秒")
rknn.release()
pyrknn测试结果精度好低啊,可能我的问题,待调查
/home/ubuntu/miniconda3/envs/rknnpy36/bin/python /home/ubuntu/PycharmProjects/pythonProject2/infer.py
--> Loading model
done
--> Init runtime environment
librknn_runtime version 1.7.3 (5047ff8 build: 2022-08-13 12:11:22 base: 1131)
done
--> Running model
[0.5580335259437561, 0.6662011742591858] [29, 16]
运行时间: 595974.0290000001 毫秒
Process finished with exit code 0
验证可行,代码待贴
c++
验证可行,代码待贴
ncnn的测试结果,目前验证中