基本思想:手中有一个口罩检测的模型和源码,模型种类比较多,pb类型,hdf5模型,pth模型,正好练习一下pb转mnn模型的检测逻辑书写,所以随手记录一下,
模型链接:https://pan.baidu.com/s/1VKkB7vetF60dJKvXFmctpw
提取码:2g90
测试图片一 不带口罩
测试图片二 戴口罩
一、转换模型到onnx
import torch
# import trochvision
import torch.utils.data
import argparse
import onnxruntime
import os
import cv2
import numpy as np
from torch.autograd import Variable
from onnxruntime.datasets import get_example
import torch
import torch.nn.functional as F
import warnings
from instances.fmssd import FMSSD
from utils.fmssd_utils import pre_process, post_process
warnings.filterwarnings('ignore')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def pre_process(image, target_shape=(360, 360)):
image_resized = cv2.resize(image, target_shape)
image_np = image_resized / 255.0 # 归一化到0~1
image_exp = np.expand_dims(image_np, axis=0)
image_transposed = image_exp.transpose((0, 3, 1, 2))
return image_transposed
def main(args):
fmssd=FMSSD()
img=cv2.imread(args.image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
box = fmssd.detect_face(img)
print(box)
input=pre_process(img)
dummy_input = torch.tensor(input).float().to(device)
torch2onnx(args, fmssd,img, dummy_input)
def torch2onnx(args, fmssd, img,dummy_input):
image=img
model = torch.load(args.model_path)
model = model.to(device)
model.eval()
input_names = ["input"] # 模型输入的name
output_names = ["output","prob"] # 模型输出的name
print("====", dummy_input.shape)
torch_out = torch.onnx.export(model, dummy_input, args.onnx_model_path,verbose=False, input_names=input_names, output_names=output_names, opset_version=11)
# test onnx model
example_model = get_example(args.onnx_model_path)
session = onnxruntime.InferenceSession(example_model)
# get the name of the first input of the model
input_name = session.get_inputs()[0].name
# print('onnx Input Name:', input_name)
result = session.run([], {input_name: dummy_input.data.cpu().numpy()})
result0 = torch.tensor(result[0], dtype=torch.float32)
result1 = torch.tensor(result[1], dtype=torch.float32)
results=(result0,result1)
print(results,type(results))
y_bboxes, bbox_max_scores, bbox_max_score_classes, keep_idxs = post_process(results, fmssd.anchors)
# Standardize output
return_boxes = []
return_labels = []
for idx in keep_idxs:
if float(bbox_max_scores[idx]) >= 0.9:
# clip the coordinate, avoid the value exceed the image boundary.
score = bbox_max_scores[idx]
x_min = max(0, int(y_bboxes[idx][0] * image.shape[1]))
y_min = max(0, int(y_bboxes[idx][1] * image.shape[0]))
x_max = min(int(y_bboxes[idx][2] * image.shape[1]), image.shape[1])
y_max = min(int(y_bboxes[idx][3] * image.shape[0]), image.shape[0])
return_boxes.append([x_min, y_min, x_max, y_max, score])
return_labels.append(str(bbox_max_score_classes[idx]))
print(return_boxes, return_labels)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="PyTorch model to onnx and ncnn")
parser.add_argument('--image_path', type=str, default=r"1.png",
help="For image from one model_file")
parser.add_argument('--model_path', type=str,
default=r"facemask.pth",
help="For training from one model_file")
parser.add_argument('--save_model_path', type=str, default=r"./",
help="For training from one model_file")
parser.add_argument('--onnx_model_path', type=str,
default=r"facemask.onnx",
help="For training from one model_file")
args = parser.parse_args()
main(args)
转化模型结果测试图一
"C:\Program Files\Python39\python.exe" G:/test.py
T= (tensor([[[ 0.4943, 0.2727, -1.4956, -0.9886],
[ 0.9121, 0.7739, -0.6659, -0.3763],
[ 0.2119, 0.6751, -0.7218, -0.7200],
...,
[-1.3140, -2.4570, -2.3000, -0.8061],
[-1.4819, -2.2536, -1.6414, -1.7638],
[-1.6244, -2.1553, -0.9989, -2.2421]]], device='cuda:0',
grad_fn=<CatBackward0>), tensor([[[5.2112e-03, 1.2535e-02],
[6.5836e-03, 1.1856e-02],
[6.8678e-03, 1.3216e-02],
...,
[3.0647e-05, 1.4859e-05],
[2.4006e-04, 3.4589e-05],
[3.4462e-04, 4.9008e-05]]], device='cuda:0', grad_fn=<CatBackward0>)) type= <class 'tuple'>
([[102, 16, 318, 266, 0.99999964]], ['0'])
image_np= (360, 360, 3)
image_exp= (1, 360, 360, 3)
image_transposed= (1, 3, 360, 360)
==== torch.Size([1, 3, 360, 360])
(tensor([[[ 0.4943, 0.2727, -1.4956, -0.9887],
[ 0.9121, 0.7739, -0.6659, -0.3763],
[ 0.2119, 0.6751, -0.7218, -0.7201],
...,
[-1.3140, -2.4570, -2.3000, -0.8061],
[-1.4819, -2.2536, -1.6414, -1.7638],
[-1.6244, -2.1553, -0.9989, -2.2421]]]), tensor([[[5.2112e-03, 1.2535e-02],
[6.5836e-03, 1.1856e-02],
[6.8678e-03, 1.3216e-02],
...,
[3.0667e-05, 1.4901e-05],
[2.4009e-04, 3.4630e-05],
[3.4460e-04, 4.8965e-05]]])) <class 'tuple'>
[[102, 16, 318, 266, 0.99999964]] ['0']
Process finished with exit code 0
图片二 标签1 是不带口罩
"C:\Program Files\Python39\python.exe" G:/test.py
T= (tensor([[[ 0.4952, 0.6524, -2.1443, -0.8687],
[ 0.6779, 1.1919, -1.1520, 0.0788],
[ 0.2762, 0.9669, -1.3204, -0.6083],
...,
[-3.5005, -3.6263, -0.9773, 0.1236],
[-4.8358, -3.2387, 0.2555, -0.5876],
[-5.7401, -2.8354, 1.0323, -1.3697]]], device='cuda:0',
grad_fn=<CatBackward0>), tensor([[[9.8892e-03, 1.6405e-02],
[8.6434e-03, 1.5386e-02],
[1.2049e-02, 1.6354e-02],
...,
[1.3120e-06, 3.6041e-06],
[1.3309e-07, 2.1818e-07],
[6.3908e-07, 3.0787e-07]]], device='cuda:0', grad_fn=<CatBackward0>)) type= <class 'tuple'>
([[110, 72, 415, 434, 0.99997485]], ['1'])
image_np= (360, 360, 3)
image_exp= (1, 360, 360, 3)
image_transposed= (1, 3, 360, 360)
==== torch.Size([1, 3, 360, 360])
(tensor([[[ 0.4952, 0.6524, -2.1443, -0.8687],
[ 0.6779, 1.1919, -1.1520, 0.0788],
[ 0.2762, 0.9669, -1.3204, -0.6083],
...,
[-3.5005, -3.6263, -0.9773, 0.1236],
[-4.8358, -3.2387, 0.2555, -0.5876],
[-5.7401, -2.8354, 1.0323, -1.3697]]]), tensor([[[9.8891e-03, 1.6405e-02],
[8.6433e-03, 1.5386e-02],
[1.2049e-02, 1.6354e-02],
...,
[1.3411e-06, 3.6359e-06],
[1.7881e-07, 2.0862e-07],
[6.2585e-07, 2.9802e-07]]])) <class 'tuple'>
[[110, 72, 415, 434, 0.9999747]] ['1']
Process finished with exit code 0
二、进行模型简化使用onnxsim
G:\***>python -m onnxsim facemask.onnx facemask_sim.onnx
Simplifying...
Checking 0/3...
Checking 1/3...
Checking 2/3...
Ok!
模型简化图
三、先转ncnn模型,进行测试
D:\ncnn\buildMinGW\install\bin>onnx2ncnn.exe facemask_sim.onnx facemask_sim.param facemask_sim.bin
使用clion测试一下ncnn推理结果结果
#include <vector>
#include <ostream>
#include <random>
#include <chrono>
#include <stdio.h>
#include <fstream>
#include <opencv2/opencv.hpp>
#include "net.h"
#include "cpu.h"
using namespace std;
int main()
{
cv::Mat img = cv::imread("F:\\temp\\ncnn-main\\ncnn-main\\1.png");
ncnn::Net facemask;
facemask.load_param("../facemask_sim.param");
facemask.load_model("../facemask_sim.bin");
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img.data, ncnn::Mat::PIXEL_BGR2RGB, img.cols, img.rows,360,360);
fprintf(stderr, "input shape: %d %d %d %d\n", in.dims, in.h, in.w, in.c);
const float mean_vals[3] = {0.0f, 0.0f, 0.0f};
const float norm_vals[3] = {1/255.0f, 1/255.0f, 1/255.0f};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = facemask.create_extractor();
ex.input("input", in);
ncnn::Mat out, prob;
ex.extract("output", out);
ex.extract("prob", prob);
fprintf(stderr, "output shape: %d %d %d %d\n", out.dims, out.h, out.w, out.c);
fprintf(stderr, "prob shape: %d %d %d %d\n", prob.dims, prob.h, prob.w, prob.c);
for (int j=0; j<out.w*out.h*out.c; j++)
{
printf("%f ", out[j]);
}
printf("\n");
for (int j=0; j<prob.w*prob.h*prob.c; j++)
{
printf("%f ", prob[j]);
}
std::cout<<"hello world"<<std::endl;
return 0;
}
测试结果图片一
0.494328 0.272666 -1.495628 -0.988653 0.912108 0.773866 -0.665871 -0.376344 0.211919 0.675081 -0.721774 -0.720052 -0.124..............-0.933028 -1.313996 -2.4572
12 -2.299649 -0.805958 -1.482189 -2.253740 -1.641062 -1.763424 -1.624771 -2.155395 -0.998626 -2.241691
0.005211 0.012535 0.006584 0.011856 0.006868 0.013216 0.007772 0.014064 0.000630 0.006766 0.002027 0.004754 0.000727 0.0
...................0.000001 0.000007 0.000001 0.000003 0.000361 0.000036 0.000031 0.000015 0.000240 0.000035 0.000345 0.000049
图片二
0.495157 0.652395 -2.144296 -0.868656 0.677928 1.191849 -1.151949 0.078795 0.276209 0.966920 -1.320431 -0.608290 -0.0484.....-3.869228 1.789988 -1.868629 -3.800155 -3.883595 -0.685847 0.580149 -3.500278 -3.625916 -0.976630 0.123151 -4.8351
08 -3.238407 0.255787 -0.588255 -5.738889 -2.835253 1.032396 -1.369883
0.009889 0.016405 0.008643 0.015386 0.012049 0.016354 0.012119 0.016875 0.004379 0.018514 0.004061 0.011692 0.004673 0.0
16652 0.004294 0.015374 0.003984 0.009661 0.002261 0.000720 0.000000 0.000013 0.0000
01 0.000110 0.000000 0.000183 0.000000 0.003125 0.000000 0.000001 0.000001 0.000004 0.000000 0.000000 0.000001 0.000000
hello
ncnn测试结果和pth、onnx的对比结果一样的
四、整个ncnn的测试代码
#include <vector>
#include <ostream>
#include <random>
#include <chrono>
#include <stdio.h>
#include <fstream>
#include <opencv2/opencv.hpp>
#include "net.h"
#include "cpu.h"
struct Rect {
float x1;
float y1;
float x2;
float y2;
Rect() :x1(0), y1(0), x2(0), y2(0) {}
Rect(float x1, float y1, float x2, float y2) :x1(x1), y1(y1), x2(x2), y2(y2) {}
};
struct BBox {
int idx;
float conf;
Rect rect;
BBox() :idx(0), conf(0), rect(0, 0, 0, 0) {}
BBox(int idx, float conf, Rect rect) : idx(idx), conf(conf), rect(rect) {}
};
using namespace std;
int generate_anchors( vector<vector<int>> feature_map_sizes , vector<vector<float>> anchor_sizes , vector<vector<float>> anchor_ratios, vector<Rect> &anchor_bboxes)
{
for(int i=0;i<feature_map_sizes.size();i++)
{
vector<float> vec_cx;
vector<float> vec_cy;
vector<vector<float>> center_tiled;
for(int j=0;j<feature_map_sizes[i][0];j++)
{
vec_cx.push_back((j+0.5)/(float)feature_map_sizes[i][0]);
vec_cy.push_back((j+0.5)/(float)feature_map_sizes[i][1]);
}
int num_anchors=anchor_sizes[i].size()+anchor_ratios[i].size()-1;
for(int m=0;m<vec_cy.size();m++){
for(int n=0;n<vec_cx.size();n++)
{
vector<float> center_tiled_item;
for(int p=0;p<2*num_anchors;p++){
center_tiled_item.push_back(vec_cx[n]);
center_tiled_item.push_back(vec_cy[m]);
}
center_tiled.push_back(center_tiled_item);
}
}
vector<float> anchor_width_heights;
for(int j=0;j<anchor_sizes[i].size();j++) {
float scale = anchor_sizes[i][j];
float ratio = anchor_ratios[i][0];
float width = scale * sqrt(ratio);
float height = scale / sqrt(ratio);
anchor_width_heights.push_back(-width / 2.0);
anchor_width_heights.push_back(-height / 2.0);
anchor_width_heights.push_back(width / 2.0);
anchor_width_heights.push_back(height / 2.0);
}
for(int j=1;j<anchor_ratios[i].size();j++) {
float s1 = anchor_sizes[i][0];
float width = s1 * sqrt(anchor_ratios[i][j]);
float height = s1 / sqrt(anchor_ratios[i][j]);
anchor_width_heights.push_back(-width / 2.0);
anchor_width_heights.push_back(-height / 2.0);
anchor_width_heights.push_back(width / 2.0);
anchor_width_heights.push_back(height / 2.0);
}
for(int x=0;x<center_tiled.size();x++)
{
for(int y=0;y< center_tiled[x].size();y+=4)
{
center_tiled[x][y] =center_tiled[x][y]+anchor_width_heights[y];
center_tiled[x][y+1] =center_tiled[x][y+1]+anchor_width_heights[y+1];
center_tiled[x][y+2] =center_tiled[x][y+2]+anchor_width_heights[y+2];
center_tiled[x][y+3] =center_tiled[x][y+3]+anchor_width_heights[y+3];
anchor_bboxes.push_back(Rect(center_tiled[x][y],center_tiled[x][y+1],center_tiled[x][y+2],center_tiled[x][y+3]));
}
}
}
}
void decode_bbox(vector<Rect> anchors,vector<vector<float>> raw_outputs,vector<float> variances,vector<Rect> &predict_bbox){
for(int i=0;i<anchors.size();i++)
{
float anchor_centers_x = (anchors[i].x1+anchors[i].x2) / 2;
float anchor_centers_y = (anchors[i].y1+anchors[i].y2) / 2;
float anchors_w = anchors[i].x2 - anchors[i].x1;
float anchors_h = anchors[i].y2 - anchors[i].y1;
float raw_outputs_rescale_0=raw_outputs[i][0]*variances[0];
float raw_outputs_rescale_1=raw_outputs[i][1]*variances[1];
float raw_outputs_rescale_2=raw_outputs[i][2]*variances[2];
float raw_outputs_rescale_3=raw_outputs[i][3]*variances[3];
float predict_center_x = raw_outputs_rescale_0* anchors_w + anchor_centers_x;
float predict_center_y = raw_outputs_rescale_1 * anchors_h + anchor_centers_y;
float predict_w = exp(raw_outputs_rescale_2) * anchors_w;
float predict_h = exp(raw_outputs_rescale_3) * anchors_h;
float predict_xmin = predict_center_x - predict_w / 2;
float predict_ymin = predict_center_y - predict_h / 2;
float predict_xmax = predict_center_x + predict_w / 2;
float predict_ymax = predict_center_y + predict_h / 2;
predict_bbox.push_back(Rect(predict_xmin,predict_ymin,predict_xmax,predict_ymax));
}
}
// 进行iou计算
float iou(Rect& r1, Rect& r2) {
float area1 = (r1.x2 - r1.x1) * (r1.y2 - r1.y1), area2 = (r2.x2 - r2.x1) * (r2.y2 - r2.y1);
float xx1 = max(r1.x1, r2.x1), yy1 = max(r1.y1, r2.y1), xx2 = min(r1.x2, r2.x2), yy2 = min(r1.y2, r2.y2);
float w = max(0.0f, xx2 - xx1), h = max(0.0f, yy2 - yy1);
float inter_area = h * w, union_area = area1 + area2 - inter_area;
return inter_area / union_area;
}
// 进行nms计算
void single_class_non_max_suppression(vector<Rect> rects, vector<float> confs,vector<Rect> &ans, vector<int> &keep_idx, float conf_thresh = 0.5, float iou_thresh = 0.5) {
if (rects.size()==0)
{
return;
}
vector<BBox> bboxes;
BBox bbox;
for (int i = 0; i < (int)rects.size(); ++i) {
bboxes.push_back(BBox(i, confs[i], rects[i]));
}
// 对bbox的conf进行降序排序
sort(bboxes.begin(), bboxes.end(), [&](const BBox& a, const BBox& b) {
return a.conf > b.conf;
});
while (!bboxes.empty()) {
bbox = bboxes[0];
if (bbox.conf < conf_thresh) {
break;
}
keep_idx.emplace_back(bbox.idx);
bboxes.erase(bboxes.begin());
// 让conf最高的bbox与其他剩余的bbox进行iou计算
int size = bboxes.size();
for (int i = 0; i < size; ++i) {
float iou_ans = iou(bbox.rect, bboxes[i].rect);
if (iou_ans > iou_thresh) {
bboxes.erase(bboxes.begin() + i);
size = bboxes.size();
i = i - 1;
}
}
}
for (const int number : keep_idx) {
ans.push_back(rects[number]);
}
}
void post_process_data(vector<Rect> y_bboxes,
vector<float> bbox_max_scores,
vector<int> bbox_max_score_classes,
vector<int> keep_idxs,
int img_width,
int img_height,
vector<BBox>& return_boxes,
vector<int>& return_labels){
for(int i=0;i<keep_idxs.size();i++) {
int idx=keep_idxs[i];
if(bbox_max_scores[idx]>=0.9){
float score = bbox_max_scores[idx];
float x_min = max(0, int(y_bboxes[idx].x1 * img_width));
float y_min = max(0, int(y_bboxes[idx].y1 * img_height));
float x_max = min(int(y_bboxes[idx].x2 * img_width), img_width);
float y_max = min(int(y_bboxes[idx].y2 * img_height), img_height);
return_boxes.push_back(BBox(idx,score,Rect(x_min, y_min, x_max, y_max)));
return_labels.push_back(bbox_max_score_classes[idx]);
}
}
}
int main()
{
cv::Mat img = cv::imread("F:\\temp\\ncnn-main\\ncnn-main\\1.png");
ncnn::Net facemask;
int target_size=360;
float conf_thresh=0.5;
float iou_thresh=0.5;
int img_width= img.cols;
int img_height=img.rows;
facemask.load_param("../facemask360_sim.param");
facemask.load_model("../facemask360_sim.bin");
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img.data, ncnn::Mat::PIXEL_BGR2RGB, img_width, img_height,target_size,target_size);
fprintf(stderr, "input shape: %d %d %d %d\n", in.dims, in.h, in.w, in.c);
const float mean_vals[3] = {0.0f, 0.0f, 0.0f};
const float norm_vals[3] = {1/255.0f, 1/255.0f, 1/255.0f};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = facemask.create_extractor();
ex.input("input", in);
ncnn::Mat out, prob;
ex.extract("output", out);
ex.extract("prob", prob);
fprintf(stderr, "output shape: %d %d %d %d\n", out.dims, out.h, out.w, out.c);
fprintf(stderr, "prob shape: %d %d %d %d\n", prob.dims, prob.h, prob.w, prob.c);
vector<vector<int>> feature_map_sizes= {{45, 45},{ 23, 23}, { 12, 12}, { 6, 6}, { 4, 4}};
vector<vector<float>> anchor_sizes= {{0.04, 0.056}, {0.08, 0.11}, {0.16, 0.22}, {0.32, 0.45}, {0.64, 0.72}};
vector<vector<float>> anchor_ratios = {{1, 0.62, 0.42},{1, 0.62, 0.42},{1, 0.62, 0.42},{1, 0.62, 0.42},{1, 0.62, 0.42}};
vector<float> variances= {0.1, 0.1, 0.2, 0.2};
vector<Rect> anchor_bboxes;
//尽量写的简单和不依赖ncnn函数 因为mnn还需要使用
generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios,anchor_bboxes);
vector<vector<float>> y_bboxes_output;
vector<vector<float>> y_cls_output;
vector<Rect> y_bboxes;
vector<float> item;
for (int i=0; i<out.c*out.h*out.w; i+=out.w)
{
for(int j=0;j<out.w;j++)
{
item.push_back(out[i+j]);
}
y_bboxes_output.push_back(item);
item.clear();
vector<float>().swap(item);
}
for (int i=0;i<prob.c*prob.h*prob.w; i+=prob.w)
{
for(int j=0;j<prob.w;j++)
{
item.push_back(prob[i+j]);
}
y_cls_output.push_back(item);
item.clear();
vector<float>().swap(item);
}
fprintf(stderr, "anchor_bboxes.size()= %d y_bboxes_output.size()= %d\n", anchor_bboxes.size(), y_bboxes_output.size());
fprintf(stderr, "y_bboxes.size()= %d \n", y_bboxes.size());
decode_bbox(anchor_bboxes, y_bboxes_output,variances,y_bboxes);
// To speed up, do single class NMS, not multiple classes NMS.
vector<float> bbox_max_scores;
vector<int> bbox_max_score_classes;
for(int i=0;i<y_cls_output.size();i++)
{
auto maxPosition = max_element(y_cls_output[i].begin(), y_cls_output[i].end());
bbox_max_scores.push_back(*maxPosition);
int index=maxPosition - y_cls_output[i].begin();
bbox_max_score_classes.push_back(index);
}
vector<Rect> ans;
vector<int> keep_idxs;
single_class_non_max_suppression(y_bboxes, bbox_max_scores,ans,keep_idxs,conf_thresh,iou_thresh);
// Standardize output
vector<BBox> return_boxes;
vector<int> return_labels;
post_process_data(y_bboxes, bbox_max_scores, bbox_max_score_classes, keep_idxs,img_width,img_height,return_boxes,return_labels);
for(int i=0;i<return_boxes.size();i++)
{
std::cout<<"[["<<return_boxes[i].rect.x1<<
", "<<return_boxes[i].rect.y1<<
", "<<return_boxes[i].rect.x2<<
", "<<return_boxes[i].rect.y2<<
", "<<return_boxes[i].conf<<"]]";
std::cout<<"[['"<<return_labels[i]<<"']]"<<std::endl;
}
return 0;
}
测试结果图片一
F:\temp\ncnn-main\ncnn-main\cmake-build-debug\test.exe
input shape: 3 360 360 3
output shape: 2 11000 4 1
prob shape: 2 11000 2 1
anchor_bboxes.size()= 11000
y_bboxes_output.size()= 11000
y_bboxes.size()= 0
[[102, 16, 318, 266, 1]][['0']]
测试结果图片二
F:\temp\ncnn-main\ncnn-main\cmake-build-debug\test.exe
input shape: 3 360 360 3
output shape: 2 11000 4 1
prob shape: 2 11000 2 1
anchor_bboxes.size()= 11000
y_bboxes_output.size()= 11000
y_bboxes.size()= 0
[[110, 72, 415, 434, 0.999975]][['1']]
Process finished with exit code 0
七、MNN使用 MinGW编译
D:\MNN\buildMinGW>MNNConvert.exe -f TF --modelFile G:\facemask.pb --MNNModel facemask_from_pb.mnn --bizCode biz
Start to Convert Other Model Format To MNN Model...
Start to Optimize the MNN Net...
inputTensors : [ data_1, ]
outputTensors: [ cls_branch_concat_1/concat, loc_branch_concat_1/concat, ]
Converted Success!
D:\MNN\buildMinGW>MNNConvert -f ONNX --modelFile G:\facemask360_sim.onnx --MNNModel facemask360_sim_from_onnx.mnn --bizCode MNN
Start to Convert Other Model Format To MNN Model...
[11:47:42] D:\MNN\tools\converter\source\onnx\onnxConverter.cpp:30: ONNX Model ir version: 7
Start to Optimize the MNN Net...
inputTensors : [ input, ]
outputTensors: [ output, prob, ]
Converted Success!
模型一的代码,因为使用的pb模型转mnn,维度是260,类型是TensorFlow
测试代码
#include <iostream>
#include<opencv2/core.hpp>
#include<opencv2/imgproc.hpp>
#include<opencv2/highgui.hpp>
#include<MNN/Interpreter.hpp>
#include<MNN/ImageProcess.hpp>
using namespace std;
using namespace MNN;
struct Rect {
float x1;
float y1;
float x2;
float y2;
Rect() :x1(0), y1(0), x2(0), y2(0) {}
Rect(float x1, float y1, float x2, float y2) :x1(x1), y1(y1), x2(x2), y2(y2) {}
};
struct BBox {
int idx;
float conf;
Rect rect;
BBox() :idx(0), conf(0), rect(0, 0, 0, 0) {}
BBox(int idx, float conf, Rect rect) : idx(idx), conf(conf), rect(rect) {}
};
using namespace std;
int generate_anchors( vector<vector<int>> feature_map_sizes , vector<vector<float>> anchor_sizes , vector<vector<float>> anchor_ratios, vector<Rect> &anchor_bboxes)
{
for(int i=0;i<feature_map_sizes.size();i++)
{
vector<float> vec_cx;
vector<float> vec_cy;
vector<vector<float>> center_tiled;
for(int j=0;j<feature_map_sizes[i][0];j++)
{
vec_cx.push_back((j+0.5)/(float)feature_map_sizes[i][0]);
vec_cy.push_back((j+0.5)/(float)feature_map_sizes[i][1]);
}
int num_anchors=anchor_sizes[i].size()+anchor_ratios[i].size()-1;
for(int m=0;m<vec_cy.size();m++){
for(int n=0;n<vec_cx.size();n++)
{
vector<float> center_tiled_item;
for(int p=0;p<2*num_anchors;p++){
center_tiled_item.push_back(vec_cx[n]);
center_tiled_item.push_back(vec_cy[m]);
}
center_tiled.push_back(center_tiled_item);
}
}
vector<float> anchor_width_heights;
for(int j=0;j<anchor_sizes[i].size();j++) {
float scale = anchor_sizes[i][j];
float ratio = anchor_ratios[i][0];
float width = scale * sqrt(ratio);
float height = scale / sqrt(ratio);
anchor_width_heights.push_back(-width / 2.0);
anchor_width_heights.push_back(-height / 2.0);
anchor_width_heights.push_back(width / 2.0);
anchor_width_heights.push_back(height / 2.0);
}
for(int j=1;j<anchor_ratios[i].size();j++) {
float s1 = anchor_sizes[i][0];
float width = s1 * sqrt(anchor_ratios[i][j]);
float height = s1 / sqrt(anchor_ratios[i][j]);
anchor_width_heights.push_back(-width / 2.0);
anchor_width_heights.push_back(-height / 2.0);
anchor_width_heights.push_back(width / 2.0);
anchor_width_heights.push_back(height / 2.0);
}
for(int x=0;x<center_tiled.size();x++)
{
for(int y=0;y< center_tiled[x].size();y+=4)
{
center_tiled[x][y] =center_tiled[x][y]+anchor_width_heights[y];
center_tiled[x][y+1] =center_tiled[x][y+1]+anchor_width_heights[y+1];
center_tiled[x][y+2] =center_tiled[x][y+2]+anchor_width_heights[y+2];
center_tiled[x][y+3] =center_tiled[x][y+3]+anchor_width_heights[y+3];
anchor_bboxes.push_back(Rect(center_tiled[x][y],center_tiled[x][y+1],center_tiled[x][y+2],center_tiled[x][y+3]));
}
}
}
}
void decode_bbox(vector<Rect> anchors,vector<vector<float>> raw_outputs,vector<float> variances,vector<Rect> &predict_bbox){
for(int i=0;i<anchors.size();i++)
{
float anchor_centers_x = (anchors[i].x1+anchors[i].x2) / 2;
float anchor_centers_y = (anchors[i].y1+anchors[i].y2) / 2;
float anchors_w = anchors[i].x2 - anchors[i].x1;
float anchors_h = anchors[i].y2 - anchors[i].y1;
float raw_outputs_rescale_0=raw_outputs[i][0]*variances[0];
float raw_outputs_rescale_1=raw_outputs[i][1]*variances[1];
float raw_outputs_rescale_2=raw_outputs[i][2]*variances[2];
float raw_outputs_rescale_3=raw_outputs[i][3]*variances[3];
float predict_center_x = raw_outputs_rescale_0* anchors_w + anchor_centers_x;
float predict_center_y = raw_outputs_rescale_1 * anchors_h + anchor_centers_y;
float predict_w = exp(raw_outputs_rescale_2) * anchors_w;
float predict_h = exp(raw_outputs_rescale_3) * anchors_h;
float predict_xmin = predict_center_x - predict_w / 2;
float predict_ymin = predict_center_y - predict_h / 2;
float predict_xmax = predict_center_x + predict_w / 2;
float predict_ymax = predict_center_y + predict_h / 2;
predict_bbox.push_back(Rect(predict_xmin,predict_ymin,predict_xmax,predict_ymax));
}
}
// 进行iou计算
float iou(Rect& r1, Rect& r2) {
float area1 = (r1.x2 - r1.x1) * (r1.y2 - r1.y1), area2 = (r2.x2 - r2.x1) * (r2.y2 - r2.y1);
float xx1 = max(r1.x1, r2.x1), yy1 = max(r1.y1, r2.y1), xx2 = min(r1.x2, r2.x2), yy2 = min(r1.y2, r2.y2);
float w = max(0.0f, xx2 - xx1), h = max(0.0f, yy2 - yy1);
float inter_area = h * w, union_area = area1 + area2 - inter_area;
return inter_area / union_area;
}
// 进行nms计算
void single_class_non_max_suppression(vector<Rect> rects, vector<float> confs,vector<Rect> &ans, vector<int> &keep_idx, float conf_thresh = 0.5, float iou_thresh = 0.5) {
if (rects.size()==0)
{
return;
}
vector<BBox> bboxes;
BBox bbox;
for (int i = 0; i < (int)rects.size(); ++i) {
bboxes.push_back(BBox(i, confs[i], rects[i]));
}
// 对bbox的conf进行降序排序
sort(bboxes.begin(), bboxes.end(), [&](const BBox& a, const BBox& b) {
return a.conf > b.conf;
});
while (!bboxes.empty()) {
bbox = bboxes[0];
if (bbox.conf < conf_thresh) {
break;
}
keep_idx.emplace_back(bbox.idx);
bboxes.erase(bboxes.begin());
// 让conf最高的bbox与其他剩余的bbox进行iou计算
int size = bboxes.size();
for (int i = 0; i < size; ++i) {
float iou_ans = iou(bbox.rect, bboxes[i].rect);
if (iou_ans > iou_thresh) {
bboxes.erase(bboxes.begin() + i);
size = bboxes.size();
i = i - 1;
}
}
}
for (const int number : keep_idx) {
ans.push_back(rects[number]);
}
}
void post_process_data(vector<Rect> y_bboxes,
vector<float> bbox_max_scores,
vector<int> bbox_max_score_classes,
vector<int> keep_idxs,
int img_width,
int img_height,
float conf_thresh,
vector<BBox>& return_boxes,
vector<int>& return_labels){
for(int i=0;i<keep_idxs.size();i++) {
int idx=keep_idxs[i];
if(bbox_max_scores[idx]>=conf_thresh){
float score = bbox_max_scores[idx];
float x_min = max(0, int(y_bboxes[idx].x1 * img_width));
float y_min = max(0, int(y_bboxes[idx].y1 * img_height));
float x_max = min(int(y_bboxes[idx].x2 * img_width), img_width);
float y_max = min(int(y_bboxes[idx].y2 * img_height), img_height);
return_boxes.push_back(BBox(idx,score,Rect(x_min, y_min, x_max, y_max)));
return_labels.push_back(bbox_max_score_classes[idx]);
}
}
}
int main() {
//cv::INTER_LINEAR为双线性插值,需要和python下的resize插值方式一致
cv::Mat img = cv::imread("F:\\untitled11\\1.png");
int target_size=260;
float conf_thresh=0.5;
float iou_thresh=0.5;
int img_width= img.cols;
int img_height=img.rows;
std::vector<float> meanVals ={ 0,0,0};;
std::vector<float> normVals= { 1.0f / 255.f,1.0f / 255.f,1.0f / 255.f};
cv::Mat img_resized;
cv::resize(img.clone(), img_resized, cv::Size(target_size, target_size));
cv::Mat img_color;
cv::cvtColor(img_resized, img_color, cv::COLOR_BGR2RGB);
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile("F:\\window10\\facemask_from_pb.mnn"));//创建解释器
cout << "Interpreter created" << endl;
ScheduleConfig config;
config.numThread = 8;
config.type = MNN_FORWARD_CPU;
auto session = net->createSession(config);//创建session
cout << "session created" << endl;
auto inTensor = net->getSessionInput(session, NULL);
auto outTensor = net->getSessionInput(session, NULL);
auto _Tensor = MNN::Tensor::create<float>({1,target_size,target_size,3}, NULL, MNN::Tensor::TENSORFLOW);
if(_Tensor->elementSize()!=3*target_size*target_size)
{
std::cout<<_Tensor->elementSize()<<" "<<img_color.channels()*img_color.cols*img_color.rows<<std::endl;
std::cout<<"input shape not equal image shape"<<std::endl;
return -1;
}
std::vector<cv::Mat> rgbChannels(3);
cv::split(img_color, rgbChannels);
for (auto i = 0; i < rgbChannels.size(); i++) {
rgbChannels[i].convertTo(rgbChannels[i], CV_32FC1, normVals[i], meanVals[i]);
for(int j=0;j<rgbChannels[i].rows;j++) {
for (int k = 0; k < rgbChannels[i].cols; k++) {
_Tensor->host<float>()[i*target_size*target_size+j*target_size+k] =rgbChannels[i].at<float>(j, k);
}
}
}
inTensor->copyFromHostTensor(_Tensor);
//推理
net->runSession(session);
auto output= net->getSessionOutput(session, "loc_branch_concat_1/concat");
//MNN::Tensor feat_tensor(output, output->getDimensionType());
//output->copyToHostTensor(&feat_tensor);
//feat_tensor.print();
MNN::Tensor output_host(output, output->getDimensionType());
output->copyToHostTensor(&output_host);
vector<vector<int>> feature_map_sizes= {{33, 33},{ 17, 17}, { 9, 9}, { 5, 5}, { 3, 3}};
vector<vector<float>> anchor_sizes= {{0.04, 0.056}, {0.08, 0.11}, {0.16, 0.22}, {0.32, 0.45}, {0.64, 0.72}};
vector<vector<float>> anchor_ratios = {{1, 0.62, 0.42},{1, 0.62, 0.42},{1, 0.62, 0.42},{1, 0.62, 0.42},{1, 0.62, 0.42}};
vector<float> variances= {0.1, 0.1, 0.2, 0.2};
vector<Rect> anchor_bboxes;
//尽量写的简单和不依赖ncnn函数 因为mnn还需要使用
generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios,anchor_bboxes);
vector<vector<float>> y_bboxes_output;
vector<vector<float>> y_cls_output;
vector<Rect> y_bboxes;
vector<float> item;
std::cout<<output_host.height()<<" "<<output_host.width()<<" "<<output_host.channel()<<" "<<output_host.elementSize()<<std::endl;
auto output_ptr = output_host.host<float>();
for (int i = 0; i < output_host.elementSize(); i+=output_host.width()) {
float output0 = output_ptr[i];
float output1 = output_ptr[i+1];
float output2 = output_ptr[i+2];
float output3 = output_ptr[i+3];
item.push_back(output0);
item.push_back(output1);
item.push_back(output2);
item.push_back(output3);
y_bboxes_output.push_back(item);
item.clear();
vector<float>().swap(item);
}
output= net->getSessionOutput(session, "cls_branch_concat_1/concat");
MNN::Tensor score_host(output, output->getDimensionType());
output->copyToHostTensor(&score_host);
auto score_ptr = score_host.host<float>();
std::cout<<score_host.height()<<" "<<score_host.width()<<" "<<score_host.channel()<<" "<<score_host.elementSize()<<std::endl;
for (int i = 0; i < score_host.elementSize(); i+=score_host.width()) {
float score0 = score_ptr[i];
float score1 = score_ptr[i+1];
item.push_back(score0);
item.push_back(score1);
y_cls_output.push_back(item);
item.clear();
vector<float>().swap(item);
}
std::cout<<anchor_bboxes.size()<<" "<<y_bboxes_output.size()<<" "<<variances.size()<<" "<<y_bboxes.size()<<std::endl;
decode_bbox(anchor_bboxes, y_bboxes_output,variances,y_bboxes);
// To speed up, do single class NMS, not multiple classes NMS.
vector<float> bbox_max_scores;
vector<int> bbox_max_score_classes;
for(int i=0;i<y_cls_output.size();i++)
{
auto maxPosition = max_element(y_cls_output[i].begin(), y_cls_output[i].end());
bbox_max_scores.push_back(*maxPosition);
int index=maxPosition - y_cls_output[i].begin();
bbox_max_score_classes.push_back(index);
}
vector<Rect> ans;
vector<int> keep_idxs;
single_class_non_max_suppression(y_bboxes, bbox_max_scores,ans,keep_idxs,conf_thresh,iou_thresh);
// Standardize output
vector<BBox> return_boxes;
vector<int> return_labels;
post_process_data(y_bboxes, bbox_max_scores, bbox_max_score_classes, keep_idxs,img_width,img_height,conf_thresh,return_boxes,return_labels);
std::cout<<"the value from max to min"<<std::endl;
for(int i=0;i<return_boxes.size();i++)
{
std::cout<<"[["<<return_boxes[i].rect.x1<<
", "<<return_boxes[i].rect.y1<<
", "<<return_boxes[i].rect.x2<<
", "<<return_boxes[i].rect.y2<<
", "<<return_boxes[i].conf<<"]]";
std::cout<<"[['"<<return_labels[i]<<"']]"<<std::endl;
}
return 0;
}
测试结果
F:\window10\cmake-build-debug\window10.exe
Interpreter created
session created
5972 4 1 23888
5972 2 1 11944
5972 5972 4 0
the value from max to min
[[29, 1, 103, 91, 0.997869]][['0']]
[[28, 257, 102, 343, 0.950967]][['0']]
[[372, 4, 446, 89, 0.9492]][['0']]
[[208, 4, 277, 91, 0.930239]][['0']]
[[28, 132, 104, 222, 0.928288]][['0']]
Process finished with exit code 0
模型二的代码 因为使用的pth模型转onnx, 然后onnx模型转mnn,维度是360,类型是Caffe
测试代码
#include <iostream>
#include<opencv2/core.hpp>
#include<opencv2/imgproc.hpp>
#include<opencv2/highgui.hpp>
#include<MNN/Interpreter.hpp>
#include<MNN/ImageProcess.hpp>
using namespace std;
using namespace MNN;
struct Rect {
float x1;
float y1;
float x2;
float y2;
Rect() :x1(0), y1(0), x2(0), y2(0) {}
Rect(float x1, float y1, float x2, float y2) :x1(x1), y1(y1), x2(x2), y2(y2) {}
};
struct BBox {
int idx;
float conf;
Rect rect;
BBox() :idx(0), conf(0), rect(0, 0, 0, 0) {}
BBox(int idx, float conf, Rect rect) : idx(idx), conf(conf), rect(rect) {}
};
using namespace std;
int generate_anchors( vector<vector<int>> feature_map_sizes , vector<vector<float>> anchor_sizes , vector<vector<float>> anchor_ratios, vector<Rect> &anchor_bboxes)
{
for(int i=0;i<feature_map_sizes.size();i++)
{
vector<float> vec_cx;
vector<float> vec_cy;
vector<vector<float>> center_tiled;
for(int j=0;j<feature_map_sizes[i][0];j++)
{
vec_cx.push_back((j+0.5)/(float)feature_map_sizes[i][0]);
vec_cy.push_back((j+0.5)/(float)feature_map_sizes[i][1]);
}
int num_anchors=anchor_sizes[i].size()+anchor_ratios[i].size()-1;
for(int m=0;m<vec_cy.size();m++){
for(int n=0;n<vec_cx.size();n++)
{
vector<float> center_tiled_item;
for(int p=0;p<2*num_anchors;p++){
center_tiled_item.push_back(vec_cx[n]);
center_tiled_item.push_back(vec_cy[m]);
}
center_tiled.push_back(center_tiled_item);
}
}
vector<float> anchor_width_heights;
for(int j=0;j<anchor_sizes[i].size();j++) {
float scale = anchor_sizes[i][j];
float ratio = anchor_ratios[i][0];
float width = scale * sqrt(ratio);
float height = scale / sqrt(ratio);
anchor_width_heights.push_back(-width / 2.0);
anchor_width_heights.push_back(-height / 2.0);
anchor_width_heights.push_back(width / 2.0);
anchor_width_heights.push_back(height / 2.0);
}
for(int j=1;j<anchor_ratios[i].size();j++) {
float s1 = anchor_sizes[i][0];
float width = s1 * sqrt(anchor_ratios[i][j]);
float height = s1 / sqrt(anchor_ratios[i][j]);
anchor_width_heights.push_back(-width / 2.0);
anchor_width_heights.push_back(-height / 2.0);
anchor_width_heights.push_back(width / 2.0);
anchor_width_heights.push_back(height / 2.0);
}
for(int x=0;x<center_tiled.size();x++)
{
for(int y=0;y< center_tiled[x].size();y+=4)
{
center_tiled[x][y] =center_tiled[x][y]+anchor_width_heights[y];
center_tiled[x][y+1] =center_tiled[x][y+1]+anchor_width_heights[y+1];
center_tiled[x][y+2] =center_tiled[x][y+2]+anchor_width_heights[y+2];
center_tiled[x][y+3] =center_tiled[x][y+3]+anchor_width_heights[y+3];
anchor_bboxes.push_back(Rect(center_tiled[x][y],center_tiled[x][y+1],center_tiled[x][y+2],center_tiled[x][y+3]));
}
}
}
}
void decode_bbox(vector<Rect> anchors,vector<vector<float>> raw_outputs,vector<float> variances,vector<Rect> &predict_bbox){
for(int i=0;i<anchors.size();i++)
{
float anchor_centers_x = (anchors[i].x1+anchors[i].x2) / 2;
float anchor_centers_y = (anchors[i].y1+anchors[i].y2) / 2;
float anchors_w = anchors[i].x2 - anchors[i].x1;
float anchors_h = anchors[i].y2 - anchors[i].y1;
float raw_outputs_rescale_0=raw_outputs[i][0]*variances[0];
float raw_outputs_rescale_1=raw_outputs[i][1]*variances[1];
float raw_outputs_rescale_2=raw_outputs[i][2]*variances[2];
float raw_outputs_rescale_3=raw_outputs[i][3]*variances[3];
float predict_center_x = raw_outputs_rescale_0* anchors_w + anchor_centers_x;
float predict_center_y = raw_outputs_rescale_1 * anchors_h + anchor_centers_y;
float predict_w = exp(raw_outputs_rescale_2) * anchors_w;
float predict_h = exp(raw_outputs_rescale_3) * anchors_h;
float predict_xmin = predict_center_x - predict_w / 2;
float predict_ymin = predict_center_y - predict_h / 2;
float predict_xmax = predict_center_x + predict_w / 2;
float predict_ymax = predict_center_y + predict_h / 2;
predict_bbox.push_back(Rect(predict_xmin,predict_ymin,predict_xmax,predict_ymax));
}
}
// 进行iou计算
float iou(Rect& r1, Rect& r2) {
float area1 = (r1.x2 - r1.x1) * (r1.y2 - r1.y1), area2 = (r2.x2 - r2.x1) * (r2.y2 - r2.y1);
float xx1 = max(r1.x1, r2.x1), yy1 = max(r1.y1, r2.y1), xx2 = min(r1.x2, r2.x2), yy2 = min(r1.y2, r2.y2);
float w = max(0.0f, xx2 - xx1), h = max(0.0f, yy2 - yy1);
float inter_area = h * w, union_area = area1 + area2 - inter_area;
return inter_area / union_area;
}
// 进行nms计算
void single_class_non_max_suppression(vector<Rect> rects, vector<float> confs,vector<Rect> &ans, vector<int> &keep_idx, float conf_thresh = 0.5, float iou_thresh = 0.5) {
if (rects.size()==0)
{
return;
}
vector<BBox> bboxes;
BBox bbox;
for (int i = 0; i < (int)rects.size(); ++i) {
bboxes.push_back(BBox(i, confs[i], rects[i]));
}
// 对bbox的conf进行降序排序
sort(bboxes.begin(), bboxes.end(), [&](const BBox& a, const BBox& b) {
return a.conf > b.conf;
});
while (!bboxes.empty()) {
bbox = bboxes[0];
if (bbox.conf < conf_thresh) {
break;
}
keep_idx.emplace_back(bbox.idx);
bboxes.erase(bboxes.begin());
// 让conf最高的bbox与其他剩余的bbox进行iou计算
int size = bboxes.size();
for (int i = 0; i < size; ++i) {
float iou_ans = iou(bbox.rect, bboxes[i].rect);
if (iou_ans > iou_thresh) {
bboxes.erase(bboxes.begin() + i);
size = bboxes.size();
i = i - 1;
}
}
}
for (const int number : keep_idx) {
ans.push_back(rects[number]);
}
}
void post_process_data(vector<Rect> y_bboxes,
vector<float> bbox_max_scores,
vector<int> bbox_max_score_classes,
vector<int> keep_idxs,
int img_width,
int img_height,
flaot conf_thresh,
vector<BBox>& return_boxes,
vector<int>& return_labels){
for(int i=0;i<keep_idxs.size();i++) {
int idx=keep_idxs[i];
if(bbox_max_scores[idx]>=conf_thresh){
float score = bbox_max_scores[idx];
float x_min = max(0, int(y_bboxes[idx].x1 * img_width));
float y_min = max(0, int(y_bboxes[idx].y1 * img_height));
float x_max = min(int(y_bboxes[idx].x2 * img_width), img_width);
float y_max = min(int(y_bboxes[idx].y2 * img_height), img_height);
return_boxes.push_back(BBox(idx,score,Rect(x_min, y_min, x_max, y_max)));
return_labels.push_back(bbox_max_score_classes[idx]);
}
}
}
int main() {
//cv::INTER_LINEAR为双线性插值,需要和python下的resize插值方式一致
cv::Mat img = cv::imread("F:\\untitled11\\1.png");
int target_size=360;
float conf_thresh=0.5;
float iou_thresh=0.5;
int img_width= img.cols;
int img_height=img.rows;
std::vector<float> meanVals ={ 0,0,0};;
std::vector<float> normVals= { 1.0f / 255.f,1.0f / 255.f,1.0f / 255.f};
cv::Mat img_resized;
cv::resize(img.clone(), img_resized, cv::Size(target_size, target_size));
cv::Mat img_color;
cv::cvtColor(img_resized, img_color, cv::COLOR_BGR2RGB);
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile("F:\\untitled11\\facemask360_sim_from_onnx.mnn"));//创建解释器
cout << "Interpreter created" << endl;
ScheduleConfig config;
config.numThread = 8;
config.type = MNN_FORWARD_CPU;
auto session = net->createSession(config);//创建session
cout << "session created" << endl;
auto inTensor = net->getSessionInput(session, NULL);
auto outTensor = net->getSessionInput(session, NULL);
auto _Tensor = MNN::Tensor::create<float>({1,3,target_size,target_size}, NULL, MNN::Tensor::CAFFE);
if(_Tensor->elementSize()!=3*target_size*target_size)
{
std::cout<<_Tensor->elementSize()<<" "<<img_color.channels()*img_color.cols*img_color.rows<<std::endl;
std::cout<<"input shape not equal image shape"<<std::endl;
return -1;
}
std::vector<cv::Mat> rgbChannels(3);
cv::split(img_color, rgbChannels);
for (auto i = 0; i < rgbChannels.size(); i++) {
rgbChannels[i].convertTo(rgbChannels[i], CV_32FC1, normVals[i], meanVals[i]);
for(int j=0;j<rgbChannels[i].rows;j++) {
for (int k = 0; k < rgbChannels[i].cols; k++) {
_Tensor->host<float>()[i*target_size*target_size+j*target_size+k] =rgbChannels[i].at<float>(j, k);
}
}
}
inTensor->copyFromHostTensor(_Tensor);
//推理
net->runSession(session);
auto output= net->getSessionOutput(session, "output");
//MNN::Tensor feat_tensor(output, output->getDimensionType());
//output->copyToHostTensor(&feat_tensor);
//feat_tensor.print();
MNN::Tensor output_host(output, output->getDimensionType());
output->copyToHostTensor(&output_host);
vector<vector<int>> feature_map_sizes= {{45, 45},{ 23, 23}, { 12, 12}, { 6, 6}, { 4, 4}};
vector<vector<float>> anchor_sizes= {{0.04, 0.056}, {0.08, 0.11}, {0.16, 0.22}, {0.32, 0.45}, {0.64, 0.72}};
vector<vector<float>> anchor_ratios = {{1, 0.62, 0.42},{1, 0.62, 0.42},{1, 0.62, 0.42},{1, 0.62, 0.42},{1, 0.62, 0.42}};
vector<float> variances= {0.1, 0.1, 0.2, 0.2};
vector<Rect> anchor_bboxes;
//尽量写的简单和不依赖ncnn函数 因为mnn还需要使用
generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios,anchor_bboxes);
vector<vector<float>> y_bboxes_output;
vector<vector<float>> y_cls_output;
vector<Rect> y_bboxes;
vector<float> item;
auto output_ptr = output_host.host<float>();
for (int i = 0; i < output_host.elementSize(); i+=output_host.height()) {
float output0 = output_ptr[i];
float output1 = output_ptr[i+1];
float output2 = output_ptr[i+2];
float output3 = output_ptr[i+3];
item.push_back(output0);
item.push_back(output1);
item.push_back(output2);
item.push_back(output3);
y_bboxes_output.push_back(item);
item.clear();
vector<float>().swap(item);
}
output= net->getSessionOutput(session, "prob");
MNN::Tensor score_host(output, output->getDimensionType());
output->copyToHostTensor(&score_host);
auto score_ptr = score_host.host<float>();
for (int i = 0; i < score_host.elementSize(); i+=score_host.height()) {
float score0 = score_ptr[i];
float score1 = score_ptr[i+1];
item.push_back(score0);
item.push_back(score1);
y_cls_output.push_back(item);
item.clear();
vector<float>().swap(item);
}
decode_bbox(anchor_bboxes, y_bboxes_output,variances,y_bboxes);
// To speed up, do single class NMS, not multiple classes NMS.
vector<float> bbox_max_scores;
vector<int> bbox_max_score_classes;
for(int i=0;i<y_cls_output.size();i++)
{
auto maxPosition = max_element(y_cls_output[i].begin(), y_cls_output[i].end());
bbox_max_scores.push_back(*maxPosition);
int index=maxPosition - y_cls_output[i].begin();
bbox_max_score_classes.push_back(index);
}
vector<Rect> ans;
vector<int> keep_idxs;
single_class_non_max_suppression(y_bboxes, bbox_max_scores,ans,keep_idxs,conf_thresh,iou_thresh);
// Standardize output
vector<BBox> return_boxes;
vector<int> return_labels;
post_process_data(y_bboxes, bbox_max_scores, bbox_max_score_classes, keep_idxs,img_width,img_height,conf_thresh,return_boxes,return_labels);
for(int i=0;i<return_boxes.size();i++)
{
std::cout<<"[["<<return_boxes[i].rect.x1<<
", "<<return_boxes[i].rect.y1<<
", "<<return_boxes[i].rect.x2<<
", "<<return_boxes[i].rect.y2<<
", "<<return_boxes[i].conf<<"]]";
std::cout<<"[['"<<return_labels[i]<<"']]"<<std::endl;
}
return 0;
}
模型二的测试结果
F:\window10\cmake-build-debug\window10.exe
Interpreter created
session created
[[102, 16, 318, 266, 1]][['0']]
Process finished with exit code 0