基本思想:在正好空闲期6月底,有实现实现自己的小理想,搞一个实例分割的部署,查找了几篇论文,测试了一下yolact效果和orienmask效果,其实之前一致想学习实例分割的原理,因为东家使用的videomatting人像分割效果并不理想,总是白色衣服和背景分裂的太严重,模型转换到c++是相当复杂,逐想搞个实例分割模型使用,正好有需求,就开始着手写。
模型路径 链接: https://pan.baidu.com/s/1W0mP2VLwoKqV5aOkuL-a3w 提取码: jvl7
第一步:下载代码进行训练,根据install.md配置环境
ubuntu@ubuntu:~/Downloads$ git clone https://github.com/duwt/OrienMask.git
Cloning into 'OrienMask'...
remote: Enumerating objects: 81, done.
remote: Counting objects: 100% (81/81), done.
remote: Compressing objects: 100% (71/71), done.
remote: Total 81 (delta 11), reused 78 (delta 8), pack-reused 0
Unpacking objects: 100% (81/81), 772.21 KiB | 1.58 MiB/s, done.
配置环境插件安装
编译安装ninja - nanmi -
测试pt模型的结果和图片
ubuntu@ubuntu:~/OrienMask$ python3 infer.py -c orienmask_yolo_coco_544_anchor4_fpn_plus_infer -w checkpoints/OrienMaskAnchor4FPNPlus/orienmask_yolo.pth -i assets/000000163126.jpg -v -o outputs
第二步:转换模型到onnx,
1)直接改源码,这样转换的模型可以直接套在ncnn和mnn使用,就没必要修改onnx的算子,当然也可以ncnn的param或者在mnn中定义config抽取中间结果,其实有人已经做了onnx的模型转换,其实很简单,但是没有写后处理代码,本着急用和动手练习的想法,就自己搞了。
因为代码的后处理涉及到下采样操作,所以将其并到模型中,这样写后处理逻辑就比较简单了,修改代码 /home/ubuntu/OrienMask/model/orienmask_yolo_fpnplus.py。这里只改生成模型,要是训练也改掉,自己改就好,其实很简单了,其实在源码后面的mask计算,还存在一个下采样操作,需要使用ncnn的方法集成自定义op去调用,我试了一下,是可以的,推理结果也是正确的,但是其实可以直接在目标cv::mat上进行mask,没必要进行下采样而转到原图上操作。解决办法只要在目标mat上使用一次resize到原图大小尺寸就行.
原代码为
orien32, orien16, orien8 = torch.split(oriens, self.num_anchors * 2, dim=1)
return (bbox32, orien32), (bbox16, orien16), (bbox8, orien8)
修改如下形式,这样不影响infer.py推理
predict = ((bbox32, orien32), (bbox16, orien16), (bbox8, orien8))
import torch.nn.functional as F
pred_orien_batch = [
F.interpolate(predict_i[1], scale_factor=4.0, mode='bilinear', align_corners=False)
for predict_i in predict
]
return (bbox32, orien32),(bbox16, orien16),(bbox8, orien8),pred_orien_batch
其中apply函数的也改掉 /home/ubuntu/OrienMask/eval(232, 232, 232); background: rgb(249, 249, 249);">
def apply(self, predict):
pred_bbox_batch = [predict_i[0] for predict_i in predict]
pred_orien_batch = [
F.interpolate(predict_i[1], scale_factor=4.0, mode='bilinear', align_corners=False)
for predict_i in predict
]
修改之后 添加if语句的保证onnx推理和pt走不同的路径
def apply(self, predict,pred_bbox_batch=None,pred_orien_batch=None):
nB = predict[0][0].size(0)
if pred_bbox_batch:
print("excute onnx infer")
pass
else:
print("excute pt infer")
pred_bbox_batch = [predict_i[0] for predict_i in predict]
pred_orien_batch = [
F.interpolate(predict_i[1], scale_factor=4.0, mode='bilinear', align_corners=False)
for predict_i in predict
]
使用下面脚本
import matplotlib.pyplot as plt
from torch.autograd import Variable
from argparse import ArgumentParser
import torch
import torch.utils.data
import onnxruntime
import cv2
import numpy as np
from onnxruntime.datasets import get_example
import torch.nn.functional as F
import math
from model.orienmask_yolo_fpnplus import OrienMaskYOLOFPNPlus
from utils.visualizer import InferenceVisualizer
from torch.nn.modules.utils import _pair
from eval("cuda:0" if torch.cuda.is_available() else "cpu")
import os
envpath = '/home/ubuntu/.local/lib/python3.8/site-packages/cv2/qt/plugins/platforms'
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = envpath
def pad(image, size_divisor=32, pad_value=0):
height, width = image.shape[-2:]
new_height = int(math.ceil(height / size_divisor) * size_divisor)
new_width = int(math.ceil(width / size_divisor) * size_divisor)
pad_left, pad_top = (new_width - width) // 2, (new_height - height) // 2
pad_right, pad_down = new_width - width - pad_left, new_height - height - pad_top
padding = [pad_left, pad_right, pad_top, pad_down]
image = F.pad(image, padding, value=pad_value)
pad_info = padding + [new_height, new_width]
return image, pad_info
def torch2onnx(args, model):
img_src=cv2.imread(args.img)
img_color = cv2.cvtColor(img_src, cv2.COLOR_BGR2RGB)
src_tensor = torch.tensor(img_color, device=device,dtype=torch.float32)
img_resize = cv2.resize(img_color, (544, 544),cv2.INTER_LINEAR)
input = np.transpose(img_resize, (2, 0, 1)).astype(np.float32)
input[0, ...] = (input[0, ...] - 0) / 255 # la
input[1, ...] = (input[1, ...] - 0) / 255
input[2, ...] = (input[2, ...] - 0) / 255
now_image= Variable(torch.from_numpy(input))
dummy_input = now_image.unsqueeze(0).to(device)
dummy_input, pad_info = pad(dummy_input)
torch.onnx.export(model, dummy_input, args.onnx_model_path, input_names=["input"],
export_params=True,
keep_initializers_as_inputs=True,
do_constant_folding=True,
verbose=False,
opset_version=11)
example_model = get_example(args.onnx_model_path)
session = onnxruntime.InferenceSession(example_model)
input_name = session.get_inputs()[0].name
result = session.run([], {input_name: dummy_input.data.cpu().numpy()})
result_tuple=((torch.tensor(result[0],device=device),torch.tensor(result[1],device=device)),
(torch.tensor(result[2],device=device),torch.tensor(result[3],device=device)),
(torch.tensor(result[4],device=device),torch.tensor(result[5],device=device)))
pred_bbox_batch=[torch.tensor(result[0],device=device),torch.tensor(result[2],device=device),torch.tensor(result[4],device=device)]
pred_orien_batch=[torch.tensor(result[6],device=device),torch.tensor(result[7],device=device),torch.tensor(result[8],device=device)]
self_grid_size = [[17, 17], [34, 34], [68, 68]]
self_image_size = [544, 544]
self_anchors = [[12, 16], [19, 36], [40, 28], [36, 75], [76, 55], [72, 146], [142, 110], [192, 243], [459, 401]]
self_anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
self_num_classes = 80
self_conf_thresh = 0.05
self_nms_func = None
self_nms_pre = 400
self_nms_post = 100
self_orien_thresh = 0.3
item_Orien=OrienMaskYOLOPostProcess(self_grid_size, self_image_size, self_anchors, self_anchor_mask, self_num_classes,
self_conf_thresh, self_nms_func, self_nms_pre,
self_nms_post, self_orien_thresh, device)
predictions =item_Orien.apply(result_tuple,pred_bbox_batch,pred_orien_batch)
dataset='COCO'
with_mask=True
conf_thresh=0.3
alpha=0.6
line_thickness=1
ifer_item=InferenceVisualizer(dataset,device, with_mask,conf_thresh,alpha,line_thickness)
show_image = ifer_item.__call__(predictions[0], src_tensor,pad_info)
plt.imsave(args.onnxoutput, show_image)
def main():
"""Test a single image."""
parser = ArgumentParser()
parser.add_argument('--img', default="/home/ubuntu/OrienMask/assets/000000163126.jpg",
help='Image file')
parser.add_argument('--weights', default="/home/ubuntu/OrienMask/checkpoints/OrienMaskAnchor4FPNPlus/orienmask_yolo.pth",
help='Checkpoint file')
parser.add_argument('--onnx_model_path',
default="/home/ubuntu/OrienMask/checkpoints/OrienMaskAnchor4FPNPlus/orienmask_yolo.onnx",
help='onnx_model_path')
parser.add_argument('--device', default='cuda:0', help='Device used for inference')
parser.add_argument('--onnxoutput', default=r'onnxsxj731533730.jpg', help='Output image')
parser.add_argument('--num_anchors', type=int, default=3, help='num_anchors')
parser.add_argument('--num_classes', type=int, default=80, help='num_classes')
args = parser.parse_args()
model=OrienMaskYOLOFPNPlus(args.num_anchors,args.num_classes).to(device)
weights = torch.load(args.weights, map_location=device)
weights = weights['state_dict'] if 'state_dict' in weights else weights
model.load_state_dict(weights, strict=True)
torch2onnx(args, model)
if __name__ == '__main__':
main()
onnx模型
onnx转化模型和生成的图片,数据我就不必比对了,结果图一致,说明数据没有问题
第三步:使用ncnn转模型和后处理,没啥难点
ubuntu@ubuntu:~/OrienMask/checkpoints/OrienMaskAnchor4FPNPlus$ python3 -m onnxsim orienmask_yolo.onnx orienmask_yolo_sim.onnx
Simplifying...
Finish! Here is the difference:
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
┃ ┃ Original Model ┃ Simplified Model ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
│ Add │ 23 │ 23 │
│ Concat │ 3 │ 3 │
│ Constant │ 8 │ 0 │
│ Conv │ 90 │ 90 │
│ LeakyRelu │ 86 │ 86 │
│ Resize │ 8 │ 8 │
│ Split │ 1 │ 1 │
│ Model Size │ 242.8MiB │ 242.8MiB │
└────────────┴────────────────┴──────────────────┘
ubuntu@ubuntu:~/ncnn/build/install/bin$ ./onnx2ncnn /home/ubuntu/OrienMask/orienmask_yolo_sim.onnx /home/ubuntu/OrienMask/orienmask_yolo_sim.param /home/ubuntu/OrienMask/orienmask_yolo_sim.bin
第四步:开始写实例分割代码~
这里使用pycharm过程中使用其debug调试中遇到一个小插曲错误,错误如下
QObject::moveToThread: Current thread (0x1b16960) is not the object's thread (0x73a4530).
Cannot move to target thread (0x1b16960)
qt.qpa.plugin: Could not load the Qt platform plugin "xcb" in "/home/ubuntu/.local/lib/python3.8/site-packages/cv2/qt/plugins" even though it was found.
This application failed to start because no Qt platform plugin could be initialized. Reinstalling the application may fix this problem.
Available platform plugins are: xcb, eglfs, linuxfb, minimal, minimalegl, offscreen, vnc, wayland-egl, wayland, wayland-xcomposite-egl, wayland-xcomposite-glx, webgl.
在调试的py代码中添加下列语句就可以正常debug了
import os
envpath = '/home/ubuntu/.local/lib/python3.8/site-packages/cv2/qt/plugins/platforms'
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = envpath
1) 、ncnn版本代码
cmakelist.txt
cmake_minimum_required(VERSION 3.16)
project(Orienmask_project)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp ")
set(CMAKE_CXX_STANDARD 11)
include_directories(${CMAKE_SOURCE_DIR}/include)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
#导入ncnn
add_library(libncnn STATIC IMPORTED)
set_target_properties(libncnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libncnn.a)
add_executable(Orienmask_project main.cpp orienmask_yolo_postprocess.cpp orienmask_yolo_postprocess.h)
target_link_libraries(Orienmask_project ${OpenCV_LIBS} libncnn)
main.cpp
#include "orienmask_yolo_postprocess.h"
void get_result_data(ncnn::Mat result, std::vector<float> &vec_result) {
for (int q = 0; q < result.c; q++) {
const float *ptr = result.channel(q);
for (int z = 0; z < result.d; z++) {
for (int y = 0; y < result.h; y++) {
for (int x = 0; x < result.w; x++) {
vec_result.push_back(ptr[x]);
}
ptr += result.w;
}
}
}
}
void InferenceVisualizer(std::vector<std::vector<float>> bbox_dets,
std::vector<std::vector<char>> mask_masks,
std::vector<int> cls_cats,
cv::Mat img_dst,
bool with_mask,
float confidence,
std::vector<int> pad_info,
std::vector<std::string> classes,
int img_w,
int img_h,
std::vector<cv::Scalar> PALETTE
) {
int left = pad_info[0];
int right = pad_info[1];
int top = pad_info[2];
int down = pad_info[3];
int h = pad_info[4];
int w = pad_info[5];
int nh = h - top - down;
int nw = w - left - right;
std::vector<std::vector<int>> mask_dets;
std::vector<cv::Scalar> color;
int length = bbox_dets.size();
for (int i = 0; i < length; i++) {
float conf = bbox_dets[i][4];
if (conf > confidence) {
int colors_idx = length * 5 + rand() % PALETTE.size();
int colors_real = colors_idx % PALETTE.size();
if (with_mask) {
for (int y = 0; y < img_dst.rows; y++) {
uchar *p = img_dst.ptr(y);
for (int x = 0; x < img_dst.cols; x++) {
if (mask_masks[i][y * img_dst.cols + x]) {
p[0] = cv::saturate_cast<uchar>(p[0] * 0.5 + PALETTE[colors_real][0] * 0.5);
p[1] = cv::saturate_cast<uchar>(p[1] * 0.5 + PALETTE[colors_real][1] * 0.5);
p[2] = cv::saturate_cast<uchar>(p[2] * 0.5 + PALETTE[colors_real][2] * 0.5);
}
p += 3;
}
}
}
float bx = (bbox_dets[i][0] * w - left) / nw;
float by = (bbox_dets[i][1] * h - top) / nh;
float bw = bbox_dets[i][2] * w / nw;
float bh = bbox_dets[i][3] * h / nh;
int bx1 = ceil((bx - bw / 2) * w); //修改了 这里 不使用实际的宽和高img_h h img_w w 这样就不用后面的下采样函数了 最后统一使用resize图片就行
int by1 = ceil((by - bh / 2) * h); //对比数据 使用 img_w img_h
int bx2 = ceil((bx + bw / 2) * w);
int by2 = ceil((by + bh / 2) * h);
cv::rectangle(img_dst, cv::Point(bx1, by1), cv::Point(bx2, by2), PALETTE[colors_real]); //两点的方式
char text[256];
sprintf(text, "%s %.1f%%", classes[cls_cats[i]].c_str(), conf * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = bx1;
int y = by1 - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > img_dst.cols)
x = img_dst.cols - label_size.width;
cv::rectangle(img_dst, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
PALETTE[colors_real], -1);
cv::putText(img_dst, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_DUPLEX, 0.4, cv::Scalar(255, 255, 255), 1, cv::LINE_AA);
}
}
cv::resize(img_dst, img_dst, cv::Size(img_w, img_h), 0, 0, cv::INTER_LINEAR);//去掉了代码中下采样操作
//cv::imshow("demo", img_dst);
cv::imwrite("ncnn.jpg", img_dst);
//cv::waitKey(0);
}
int main() {
std::vector<std::string> classes = {
"person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
"boat", "traffic-light", "fire-hydrant", "stop-sign", "parking-meter", "bench",
"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
"giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
"snowboard", "sports-ball", "kite", "baseball-bat", "baseball-glove", "skateboard",
"surfboard", "tennis-racket", "bottle", "wine-glass", "cup", "fork", "knife",
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
"hot-dog", "pizza", "donut", "cake", "chair", "sofa", "potted-plant", "bed",
"dining-table", "toilet", "tv-monitor", "laptop", "mouse", "remote", "keyboard",
"cell-phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
"clock", "vase", "scissors", "teddy-bear", "hair-drier", "toothbrush"
};
std::vector<std::vector<int>> vec_grid_size = {{17, 17},
{34, 34},
{68, 68}};
std::vector<std::vector<int>> vec_anchors = {{12, 16},
{19, 36},
{40, 28},
{36, 75},
{76, 55},
{72, 146},
{142, 110},
{192, 243},
{459, 401}};
std::vector<std::vector<int>> vec_anchor_mask = {{6, 7, 8},
{3, 4, 5},
{0, 1, 2}};
std::vector<cv::Scalar> PALETTE = {
cv::Scalar(233, 30, 99),
cv::Scalar(255, 235, 59),
cv::Scalar(255, 193, 7),
cv::Scalar(255, 152, 0),
cv::Scalar(255, 87, 34),
cv::Scalar(121, 85, 72),
cv::Scalar(158, 158, 158),
cv::Scalar(96, 125, 139),
cv::Scalar(156, 39, 176),
cv::Scalar(103, 58, 183),
cv::Scalar(63, 81, 181),
cv::Scalar(33, 150, 243),
cv::Scalar(3, 169, 244),
cv::Scalar(0, 188, 212),
cv::Scalar(0, 150, 136),
cv::Scalar(76, 175, 80),
cv::Scalar(139, 195, 74),
cv::Scalar(205, 220, 57),
cv::Scalar(244, 67, 54)
};
int num_classes = 80;
float conf_thresh = 0.05;
int nms_pre = 400;
int nms_post = 100;
float orien_thresh = 0.3;
int target_size = 544;
std::vector<int> vec_image_size = {target_size, target_size};
std::vector<int> pad_info={0,0,0,0,target_size, target_size};
OrienMaskYOLOPostProcess *item_Orien = new OrienMaskYOLOPostProcess(vec_grid_size, vec_image_size, vec_anchors,
vec_anchor_mask, num_classes,
conf_thresh, nms_pre,
nms_post, orien_thresh);
cv::Mat img_src = cv::imread("/home/ubuntu/OrienMask/assets/000000163126.jpg");
int img_w=img_src.cols;
int img_h=img_src.rows;
cv::Mat img_dst;
cv::resize(img_src, img_dst, cv::Size(target_size, target_size),0,0,cv::INTER_LINEAR);
ncnn::Net net;
net.load_param("../model/orienmask_yolo_sim.param");
net.load_model("../model/orienmask_yolo_sim.bin");
auto start = std::chrono::high_resolution_clock::now(); //开始时间
ncnn::Mat in = ncnn::Mat::from_pixels(img_dst.data, ncnn::Mat::PIXEL_BGR2RGB, img_dst.cols, img_dst.rows);
const float norm_255[3] = {1 / 255.0f, 1 / 255.0f, 1 / 255.0f};
// in.substract_mean_normalize(mean_vals, std_vals);
in.substract_mean_normalize(0, norm_255);
ncnn::Extractor ex = net.create_extractor();
ex.input("input", in);
ncnn::Mat result_0;
ncnn::Mat result_1;
ncnn::Mat result_2;
ncnn::Mat result_3;
ncnn::Mat result_4;
ncnn::Mat result_5;
std::vector<float> vec_result_0;
std::vector<float> vec_result_1;
std::vector<float> vec_result_2;
std::vector<float> vec_result_3;
std::vector<float> vec_result_4;
std::vector<float> vec_result_5;
ex.extract("770", result_0);
printf("result_0 shape: %d %d %d %d %d\n", result_0.dims, result_0.d, result_0.c, result_0.h, result_0.w);
get_result_data(result_0, vec_result_0);
ex.extract("774", result_1);
printf("result_1 shape: %d %d %d %d %d\n", result_1.dims, result_1.d, result_1.c, result_1.h, result_1.w);
get_result_data(result_1, vec_result_1);
ex.extract("778", result_2);
printf("result_2 shape: %d %d %d %d %d\n", result_2.dims, result_2.d, result_2.c, result_2.h, result_2.w);
get_result_data(result_2, vec_result_2);
ex.extract("845", result_3);
printf("result_6 shape: %d %d %d %d %d\n", result_3.dims, result_3.d, result_3.c, result_3.h, result_3.w);
get_result_data(result_3, vec_result_3);
ex.extract("850", result_4);
printf("result_7 shape: %d %d %d %d %d\n", result_4.dims, result_4.d, result_4.c, result_4.h, result_4.w);
get_result_data(result_4, vec_result_4);
ex.extract("855", result_5);
printf("result_8 shape: %d %d %d %d %d\n", result_5.dims, result_5.d, result_5.c, result_5.h, result_5.w);
get_result_data(result_5, vec_result_5);
auto end = std::chrono::high_resolution_clock::now(); //结束时间
auto duration = (end - start).count();
std::cout << "程序运行时间:" << std::setprecision(10) << duration / 1000000000.0 << "s"
<< std::endl;
//后处理写的尽量不依赖ncnn 还要mnn使用
std::vector<std::vector<char>> mask_masks;
std::vector<std::vector<float>> bbox_dets;
std::vector<int> cls_cats;
item_Orien->apply(vec_result_0, vec_result_1, vec_result_2,
vec_result_3,
vec_result_4, vec_result_5, bbox_dets, mask_masks, cls_cats);
bool with_mask = true;
float confidence = 0.3f;
InferenceVisualizer(bbox_dets, mask_masks, cls_cats, img_dst, with_mask, confidence, pad_info, classes,img_w,img_h,
PALETTE);
auto end0 = std::chrono::high_resolution_clock::now(); //结束时间
auto duration0 = (end - start).count();
std::cout << "程序运行时间:" << std::setprecision(10) << duration0 / 1000000000.0 << "s"
<< std::endl;
delete item_Orien;
return 0;
}
orienmask_yolo_postprocess.h文件
//
// Created by ubuntu on 2022/7/14.
//
#ifndef UNTITLED3_ORIENMASK_YOLO_POSTPROCESS_H
#define UNTITLED3_ORIENMASK_YOLO_POSTPROCESS_H
#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
#include <omp.h>
#include <iostream>
#include <numeric>
#include<opencv2/opencv.hpp>
#include "chrono"
#include <opencv2/highgui/highgui.hpp>
#include "ncnn/net.h"
struct Rect {
float x;
float y;
float width;
float height;
Rect() : x(0), y(0), width(0), height(0) {}
Rect(float x, float y, float width, float height) : x(x), y(y), width(width), height(height) {}
};
struct BBox {
int label;
float conf;
int index;
Rect rect;
BBox() : label(0), conf(0), index(0), rect(0, 0, 0, 0) {}
BBox(int label, float conf, int index, Rect rect) : label(label), conf(conf), index(index), rect(rect) {}
};
class OrienMaskYOLOPostProcess {
public:
OrienMaskYOLOPostProcess(std::vector<std::vector<int>> vec_grid_size,
std::vector<int> vec_image_size,
std::vector<std::vector<int>> vec_anchors,
std::vector<std::vector<int>> vec_anchor_mask,
int num_classes,
float conf_thresh,
int nms_pre,
int nms_post,
float orien_thresh);
~OrienMaskYOLOPostProcess();
public:
void apply(std::vector<float> vec_result_0,
std::vector<float> vec_result_1,
std::vector<float> vec_result_2,
std::vector<float> vec_result_6,
std::vector<float> vec_result_7,
std::vector<float> vec_result_8,
std::vector<std::vector<float>> &box_dets,
std::vector<std::vector<char>> &mask_masks,
std::vector<int> &cls_cats);
private:
void get_boxes(std::vector<float> vec_pred_bbox,
int dim_0,
int item_nHs,
int item_nWs,
std::vector<std::vector<float>> anchors,
std::vector<int> item_grid_x,
std::vector<int> item_grid_y,
std::vector<std::vector<float>> &vec_pred_coord,
std::vector<std::vector<float>> &vec_pred_conf);
float sigmoid(float num);
void get_orien_grid(std::vector<std::vector<float>> dets_orien, std::vector<std::vector<float>> &result_dets_orien);
void transpose_dim1_dim2(int dim_0, int dim_2, int dim_3, std::vector<float> bbox_batch,
std::vector<float> &swap_bbox_batch);
void transpose_dim2_dim3(int dim_0, int dim_2, int dim_3, std::vector<float> bbox_batch,
std::vector<float> &swap_bbox_batch);
void multi_class_nms(std::vector<BBox> result_dets_bbox,
std::vector<int> result_dets_anchor_idx,
std::vector<std::vector<float>> result_dets_orien,
std::vector<std::vector<float>> &box_dets,
std::vector<std::vector<char>> &mask_masks,
std::vector<int> &cls_cats);
void nms_sorted_bboxes(const std::vector<BBox> &faceobjects, std::vector<int> &picked, float nms_threshold,
bool agnostic = false);
float intersection_area(BBox a, BBox b);
void qsort_descent_inplace(std::vector<BBox> &bjects);
void qsort_descent_inplace(std::vector<BBox> &objects, int left, int right);
private:
std::vector<int> m_nHs, m_nWs;
int m_scales;
float m_image_h;
float m_image_w;
std::vector<std::vector<int>> m_pixel_anchors;
std::vector<std::vector<float>> m_normalized_anchors;
std::vector<std::vector<float>> m_grid_anchors;
std::vector<std::vector<float>> m_grid_sizes;
std::vector<int> m_num_anchors;
int m_num_classes;
float m_conf_thresh;
int m_nms_pre;
int m_nms_post;
float m_orien_thresh;
std::vector<std::vector<int>> m_grid_y;
std::vector<std::vector<int>> m_grid_x;
std::vector<std::vector<int>> m_anchor_idx;
std::vector<std::vector<float>> m_base_xy;
std::vector<std::vector<int>> m_anchor_mask;
std::vector<int> m_dets_grid_x;
std::vector<int> m_dets_grid_y;
std::vector<int> m_dets_anchor_idx;
};
#endif //UNTITLED3_ORIENMASK_YOLO_POSTPROCESS_H
orienmask_yolo_postprocess.cpp 文件
//
// Created by ubuntu on 2022/7/14.
//
#include "orienmask_yolo_postprocess.h"
OrienMaskYOLOPostProcess::OrienMaskYOLOPostProcess(std::vector<std::vector<int>> vec_grid_size,
std::vector<int> vec_image_size,
std::vector<std::vector<int>> vec_anchors,
std::vector<std::vector<int>> vec_anchor_mask,
int num_classes,
float conf_thresh,
int nms_pre,
int nms_post,
float orien_thresh) {
m_anchor_mask = vec_anchor_mask;
m_orien_thresh = orien_thresh;
m_nms_post = nms_post;
m_nms_pre = nms_pre;
m_conf_thresh = conf_thresh;
m_num_classes = num_classes;
m_scales = vec_grid_size.size();
for (int i = 0; i < m_scales; i++) {
m_nHs.emplace_back(vec_grid_size[i][0]);
m_nWs.emplace_back(vec_grid_size[i][1]);
}
m_image_h = vec_image_size[0];
m_image_w = vec_image_size[1];
m_pixel_anchors = vec_anchors;
m_normalized_anchors.resize(vec_anchors.size());
for (int i = 0; i < vec_anchors.size(); i++) {
m_normalized_anchors[i].emplace_back(vec_anchors[i][0] / m_image_w);
m_normalized_anchors[i].emplace_back(vec_anchors[i][1] / m_image_h);
}
m_grid_sizes = m_normalized_anchors;
m_grid_anchors = m_normalized_anchors;
for (int i = 0; i < vec_anchor_mask.size(); i++) {
m_num_anchors.emplace_back(vec_anchor_mask[i].size());
for (int j = 0; j < vec_anchor_mask[i].size(); j++) { //待优化
m_grid_anchors[vec_anchor_mask[i][j]][0] = m_normalized_anchors[vec_anchor_mask[i][j]][0] * m_nWs[i];
m_grid_anchors[vec_anchor_mask[i][j]][1] = m_normalized_anchors[vec_anchor_mask[i][j]][1] * m_nHs[i];
m_grid_sizes[vec_anchor_mask[i][j]][0] = m_nWs[i];
m_grid_sizes[vec_anchor_mask[i][j]][1] = m_nHs[i];
}
}
m_base_xy.resize(vec_anchors.size());
std::vector<int> item_grid_y, item_grid_x;
for (int i = 0; i < vec_anchor_mask.size(); i++) {
std::vector<float> item_base_xy;
std::vector<float> item_base_y, item_base_x;
for (int m = 0; m < m_image_h; m++) {
for (int n = 0; n < m_image_w; n++) {
item_base_y.emplace_back(m / m_image_h * m_nHs[i]);
item_base_x.emplace_back(n / m_image_w * m_nWs[i]);
}
}
item_base_xy.insert(item_base_xy.end(), item_base_x.begin(), item_base_x.end());
item_base_xy.insert(item_base_xy.end(), item_base_y.begin(), item_base_y.end());
for (int j = 0; j < vec_anchor_mask[i].size(); j++) { //待优化
m_base_xy[vec_anchor_mask[i][j]].insert(m_base_xy[j].end(), item_base_xy.begin(), item_base_xy.end());
}
std::vector<int> grid_x, grid_y;
for (int j = 0, k = 0; j < m_num_anchors[i] * m_nHs[i] * m_nWs[i]; k++, j++) {
grid_x.emplace_back(j % m_nHs[i]);
if (k % (m_nHs[i] * m_nWs[i]) == 0) k = 0;
grid_y.emplace_back(k / m_nWs[i]);
}
m_grid_x.emplace_back(grid_x);
m_grid_y.emplace_back(grid_y);
for (int j = 0; j < vec_anchor_mask[i].size(); j++) {
std::vector<int> item_anchor_idx;
for (int m = 0; m < m_nHs[i]; m++) {
for (int n = 0; n < m_nWs[i]; n++) {
item_anchor_idx.emplace_back(vec_anchor_mask[i][j]);
}
}
m_anchor_idx.emplace_back(item_anchor_idx);
}
}
for (int i = 0; i < m_grid_x.size(); i++) {
m_dets_grid_x.insert(m_dets_grid_x.end(), m_grid_x[i].begin(), m_grid_x[i].end());
}
for (int i = 0; i < m_grid_y.size(); i++) {
m_dets_grid_y.insert(m_dets_grid_y.end(), m_grid_y[i].begin(), m_grid_y[i].end());
}
for (int i = 0; i < m_anchor_idx.size(); i++) {
for (int j = 0; j < m_anchor_idx[i].size(); j++) {
m_dets_anchor_idx.emplace_back(m_anchor_idx[i][j]);
}
}
}
OrienMaskYOLOPostProcess::~OrienMaskYOLOPostProcess() {
m_nHs.clear();
std::vector<int>().swap(m_nHs);
m_nWs.clear();
std::vector<int>().swap(m_nWs);
m_pixel_anchors.clear();
std::vector<std::vector<int>>().swap(m_pixel_anchors);
m_grid_y.clear();
std::vector<std::vector<int>>().swap(m_anchor_idx);
m_base_xy.clear();
std::vector<std::vector<float>>().swap(m_base_xy);
m_anchor_mask.clear();
std::vector<std::vector<int>>().swap(m_anchor_mask);
}
void OrienMaskYOLOPostProcess::get_orien_grid(std::vector<std::vector<float>> dets_orien,
std::vector<std::vector<float>> &result_dets_orien) {
for (int i = 0; i < dets_orien.size(); i++) {
int length = dets_orien[i].size();
for (int j = 0; j < length; j++) {
result_dets_orien[i][j] =
dets_orien[i][j] * m_grid_anchors[i][j < length / 2 ? 0 : 1] / 2 + m_base_xy[i][j];
}
}
}
void OrienMaskYOLOPostProcess::transpose_dim1_dim2(int dim_0, int dim_2, int dim_3, std::vector<float> bbox_batch,
std::vector<float> &swap_bbox_batch) {
int elements_count = bbox_batch.size();
int dim_1 = elements_count / (dim_0 * dim_2 * dim_3);
swap_bbox_batch.resize(elements_count);
for (int i = 0; i < dim_0; i++) {
for (int j = 0; j < dim_2; j++) {
#pragma omp parallel for num_threads(omp_get_max_threads())
for (int k = 0; k < dim_1; k++) {
for (int t = 0; t < dim_3; t++) {
swap_bbox_batch[i * dim_2 * dim_1 * dim_3 + j * dim_1 * dim_3 + k * dim_3 + t] = bbox_batch[
i * dim_1 * dim_2 * dim_3 + j * dim_2 + k * dim_2 * dim_3 + t];
}
}
}
}
}
void OrienMaskYOLOPostProcess::transpose_dim2_dim3(int dim_0, int dim_2, int dim_3, std::vector<float> bbox_batch,
std::vector<float> &swap_bbox_batch) {
int elements_count = bbox_batch.size();
int dim_1 = elements_count / (dim_0 * dim_2 * dim_3);
swap_bbox_batch.resize(elements_count);
for (int i = 0; i < dim_0; i++) {
for (int j = 0; j < dim_2; j++) {
#pragma omp parallel for num_threads(omp_get_max_threads())
for (int k = 0; k < dim_3; k++) {
for (int t = 0; t < dim_1; t++) {
swap_bbox_batch[i * dim_2 * dim_3 * dim_1 + j * dim_3 * dim_1 + k * dim_1 + t] = bbox_batch[
i * dim_1 * dim_2 * dim_3 + j * dim_3 * dim_1 + k + t * dim_2];
}
}
}
}
}
void OrienMaskYOLOPostProcess::qsort_descent_inplace(std::vector<BBox> &objects, int left, int right) {
int i = left;
int j = right;
float p = objects[(left + right) / 2].conf;
while (i <= j) {
while (objects[i].conf > p)
i++;
while (objects[j].conf < p)
j--;
if (i <= j) {
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
void OrienMaskYOLOPostProcess::qsort_descent_inplace(std::vector<BBox> &objects) {
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
float OrienMaskYOLOPostProcess::intersection_area(BBox a, BBox b) {
float x1 = a.rect.x;
float y1 = a.rect.y;
float x2 = a.rect.x + a.rect.width;
float y2 = a.rect.y + a.rect.height;
float x3 = b.rect.x;
float y3 = b.rect.y;
float x4 = b.rect.x + b.rect.width;
float y4 = b.rect.y + b.rect.height;
float n1 = std::max(std::min(x1, x2), std::min(x3, x4));//求重叠区域左下角横坐标
float m1 = std::max(std::min(y1, y2), std::min(y3, y4));//求重叠区域左下角纵坐标
float n2 = std::min(std::max(x1, x2), std::max(x3, x4));//求重叠区域右下角横坐标
float m2 = std::min(std::max(y1, y2), std::max(y3, y4));//求重叠区域右下角纵坐标
if (n2 > n1 && m2 > m1) {
return (n2 - n1) * (m2 - m1);
} else {
return 0.0f;
}
}
void OrienMaskYOLOPostProcess::nms_sorted_bboxes(const std::vector<BBox> &objects, std::vector<int> &picked,
float nms_threshold, bool agnostic) {
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
#pragma omp parallel for num_threads(omp_get_max_threads())
for (int i = 0; i < n; i++) {
areas[i] = objects[i].rect.width * objects[i].rect.height;
}
for (int i = 0; i < n; i++) {
const BBox &a = objects[i];
int keep = 1;
for (int j = 0; j < (int) picked.size(); j++) {
const BBox &b = objects[picked[j]];
if (!agnostic && a.label != b.label)
continue;
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
float IoU = inter_area / union_area;
if (IoU > nms_threshold)
keep = 0;
}
if (keep)
picked.emplace_back(i);
}
}
void OrienMaskYOLOPostProcess::multi_class_nms(std::vector<BBox> result_dets_bbox,
std::vector<int> result_dets_anchor_idx,
std::vector<std::vector<float>> result_dets_orien,
std::vector<std::vector<float>> &box_dets,
std::vector<std::vector<char>> &mask_masks,
std::vector<int> &cls_cats) {
// float m_max_coordinate=1.5;
// for(int i=0;i<result_dets_bbox.size();i++){
// result_dets_bbox[i].rect.x=result_dets_bbox[i].rect.x+(result_dets_cls[i]*(m_max_coordinate+0.5));
// result_dets_bbox[i].rect.y=result_dets_bbox[i].rect.y+(result_dets_cls[i]*(m_max_coordinate+0.5));
//
// }
// sort all proposals by score from highest to lowest
qsort_descent_inplace(result_dets_bbox);
// apply nms with nms_threshold
std::vector<int> keep_idx;
nms_sorted_bboxes(result_dets_bbox, keep_idx, 0.5);// 修改ncnn
if (keep_idx.size() > m_nms_post) {
}
std::vector<float> x_centers, y_centers, det_width, det_height;
std::vector<float> mask_w, mask_h;
for (int i = 0; i < keep_idx.size(); i++) {
int real_index = result_dets_bbox[keep_idx[i]].index;
int anchor_idx = result_dets_anchor_idx[real_index];
cls_cats.emplace_back(result_dets_bbox[keep_idx[i]].label);
box_dets.push_back({result_dets_bbox[keep_idx[i]].rect.x,
result_dets_bbox[keep_idx[i]].rect.y,
result_dets_bbox[keep_idx[i]].rect.width,
result_dets_bbox[keep_idx[i]].rect.height,
result_dets_bbox[keep_idx[i]].conf});
float x_centers_value = m_grid_sizes[anchor_idx][0] * result_dets_bbox[keep_idx[i]].rect.x;
x_centers.emplace_back(x_centers_value);
float y_centers_value = m_grid_sizes[anchor_idx][1] * result_dets_bbox[keep_idx[i]].rect.y;
y_centers.emplace_back(y_centers_value);
float det_width_value = result_dets_bbox[keep_idx[i]].rect.width;
det_width.emplace_back(det_width_value);
float det_height_value = result_dets_bbox[keep_idx[i]].rect.height;
det_height.emplace_back(det_height_value);
float mask_w_value = m_orien_thresh * det_width_value * m_grid_sizes[anchor_idx][0];
mask_w.emplace_back(mask_w_value);
float mask_h_value = m_orien_thresh * det_height_value * m_grid_sizes[anchor_idx][1];
mask_h.emplace_back(mask_h_value);
std::vector<char> item_mask;
for (int j = 0; j < result_dets_orien[anchor_idx].size() / 2; j++) {
char left_mask = abs(result_dets_orien[anchor_idx][j] - x_centers_value) < mask_w_value ? 1 : 0;
char right_mask =
abs(result_dets_orien[anchor_idx][j + m_image_h * m_image_w] - y_centers_value) < mask_h_value ? 1
: 0;
item_mask.emplace_back(left_mask & right_mask);
}
mask_masks.emplace_back(item_mask);
}
}
void OrienMaskYOLOPostProcess::apply(std::vector<float> vec_result_0,
std::vector<float> vec_result_1,
std::vector<float> vec_result_2,
std::vector<float> vec_result_3,
std::vector<float> vec_result_4,
std::vector<float> vec_result_5,
std::vector<std::vector<float>> &box_dets,
std::vector<std::vector<char>> &mask,
std::vector<int> &cls_cats
) {
if (!vec_result_0.size()) {
return;
}
std::vector<std::vector<float>> pred_bbox_batch = {vec_result_0, vec_result_1, vec_result_2};
std::vector<std::vector<float>> pred_orien_batch = {vec_result_3, vec_result_4, vec_result_5};
std::vector<std::vector<float>> vec_dets_coord;
std::vector<std::vector<float>> vec_dets_conf;
std::vector<std::vector<float>> pred_bbox;
std::vector<std::vector<float>> pred_orien;
std::vector<std::vector<float>> dets_orien(m_base_xy.size());
for (int i = 0; i < m_scales; i++) {
std::vector<std::vector<float>> anchors;
for (int j = 0; j < m_anchor_mask[i].size(); j++) {
anchors.emplace_back(m_normalized_anchors[m_anchor_mask[i][j]]);
}
std::vector<std::vector<float>> vec_pred_coord;
std::vector<std::vector<float>> vec_pred_conf;
std::vector<float> swap_bbox_batch_1_2, swap_bbox_batch_2_3;//view(nA,-1,nH,nW)
transpose_dim1_dim2(m_num_anchors[i], m_nHs[i], m_nWs[i], pred_bbox_batch[i],
swap_bbox_batch_1_2); // transpose(1, 2).contiguous()
transpose_dim2_dim3(m_num_anchors[i], m_nHs[i], m_nWs[i], swap_bbox_batch_1_2,
swap_bbox_batch_2_3); // transpose(2, 3).contiguous()
get_boxes(swap_bbox_batch_2_3, m_num_anchors[i], m_nHs[i], m_nWs[i], anchors, m_grid_x[i], m_grid_y[i],
vec_pred_coord, vec_pred_conf);
vec_dets_coord.insert(vec_dets_coord.end(), vec_pred_coord.begin(), vec_pred_coord.end());
vec_dets_conf.insert(vec_dets_conf.end(), vec_pred_conf.begin(), vec_pred_conf.end());
for (int j = 0; j < m_num_anchors[i]; j++) {
dets_orien[m_anchor_mask[i][j]].insert(dets_orien[m_anchor_mask[i][j]].end(),
pred_orien_batch[i].begin() + j * m_image_h * m_image_w * 2,
pred_orien_batch[i].begin() +
(j + 1) * m_image_h * m_image_w * 2); //数据无误 已经比对
}
}
std::vector<std::vector<float>> result_dets_orien(dets_orien);
get_orien_grid(dets_orien, result_dets_orien); // 9 --->2*544*544 存储的
std::vector<int> selected_inds, result_dets_cls;
std::vector<float> result_dets_conf;
for (int i = 0; i < vec_dets_conf.size(); i++) {
for (int j = 0; j < vec_dets_conf[i].size(); j++) {
if (vec_dets_conf[i][j] > m_conf_thresh) {
selected_inds.emplace_back(i);
result_dets_cls.emplace_back(j);
result_dets_conf.emplace_back(vec_dets_conf[i][j]);
}
}
}
if (selected_inds.size() > m_nms_pre) {
}
std::vector<Rect> result_pred_coord;
std::vector<int> result_dets_anchor_idx;
std::vector<BBox> result_dets_bbox;
for (int index = 0; index < selected_inds.size(); index++) {
result_dets_anchor_idx.emplace_back(m_dets_anchor_idx[selected_inds[index]]);
result_dets_bbox.emplace_back(BBox(result_dets_cls[index],
result_dets_conf[index],
index,
Rect(vec_dets_coord[selected_inds[index]][0],
vec_dets_coord[selected_inds[index]][1],
vec_dets_coord[selected_inds[index]][2],
vec_dets_coord[selected_inds[index]][3])
));
}
multi_class_nms(result_dets_bbox, result_dets_anchor_idx, result_dets_orien, box_dets, mask, cls_cats);
}
float OrienMaskYOLOPostProcess::sigmoid(float num) {
return 1 / (1 + exp(-num));
}
void OrienMaskYOLOPostProcess::get_boxes(std::vector<float> vec_pred_bbox,
int dim_0,
int item_nHs,
int item_nWs,
std::vector<std::vector<float>> anchors,
std::vector<int> item_grid_x,
std::vector<int> item_grid_y,
std::vector<std::vector<float>> &vec_pred_coord,
std::vector<std::vector<float>> &vec_pred_conf) {
std::vector<std::vector<float>> vec_loc_pred_coord;
std::vector<float> vec_pred_obj;
std::vector<std::vector<float>> vec_pred_cls;
std::vector<float> item_pred_cls;
int length = vec_pred_bbox.size();// 3 17 17 85
int stride = length / (dim_0 * item_nWs * item_nHs);
for (int i = 0; i < vec_pred_bbox.size(); i += stride) {
std::vector<float> item_pred_coord;
std::vector<float> item_pred_obj;
item_pred_coord.emplace_back(vec_pred_bbox[i]);
item_pred_coord.emplace_back(vec_pred_bbox[i + 1]);
item_pred_coord.emplace_back(vec_pred_bbox[i + 2]);
item_pred_coord.emplace_back(vec_pred_bbox[i + 3]);
vec_pred_obj.emplace_back(sigmoid(vec_pred_bbox[i + 4]));
for (int j = 5; j < stride; j++) {
item_pred_cls.emplace_back(sigmoid(vec_pred_bbox[i + j]));
if (item_pred_cls.size() == m_num_classes) { //view(-1,80)
vec_pred_cls.emplace_back(item_pred_cls);
item_pred_cls.clear();
std::vector<float>().swap(item_pred_cls);
}
}
vec_loc_pred_coord.emplace_back(item_pred_coord);
}
for (int i = 0; i < vec_pred_cls.size(); i++) {
std::vector<float> item_pred_conf;
for (int j = 0; j < vec_pred_cls[i].size(); j++) {
item_pred_conf.emplace_back(vec_pred_cls[i][j] * vec_pred_obj[i]);
}
vec_pred_conf.emplace_back(item_pred_conf);
}
for (int i = 0; i < vec_loc_pred_coord.size(); i++) {
float pred_coord_0 = (sigmoid(vec_loc_pred_coord[i][0]) + item_grid_x[i]) / item_nWs;
float pred_coord_1 = (sigmoid(vec_loc_pred_coord[i][1]) + item_grid_y[i]) / item_nHs;
float pred_coord_2 = exp(vec_loc_pred_coord[i][2]) * anchors[i / (item_nWs * item_nHs)][0];
float pred_coord_3 = exp(vec_loc_pred_coord[i][3]) * anchors[i / (item_nWs * item_nHs)][1];
vec_pred_coord.push_back({pred_coord_0, pred_coord_1, pred_coord_2, pred_coord_3});
}
}
测试图片
2) 、mnn版本,只换个main.cpp即可
ubuntu@ubuntu:~/MNN/build$ ./MNNConvert -f ONNX --modelFile /home/ubuntu/AndroidStudioProjects/Orienmask_project/model/orienmask_yolo_sim.onnx --MNNModel /home/ubuntu/AndroidStudioProjects/Orienmask_project/model/orienmask_yolo_sim.mnn --bizCode MNN
Start to Convert Other Model Format To MNN Model...
[12:57:23] /home/ubuntu/MNN/tools/converter/source/onnx/onnxConverter.cpp:40: ONNX Model ir version: 6
Start to Optimize the MNN Net...
inputTensors : [ input, ]
outputTensors: [ 770, 774, 778, 838, 839, 840, 845, 850, 855, ]
Converted Success!
模型结构
cmakelists.txt文件
cmake_minimum_required(VERSION 3.16)
project(Orienmask_project)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp ")
set(CMAKE_CXX_STANDARD 11)
include_directories(${CMAKE_SOURCE_DIR}/include)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
#导入ncnn
add_library(libmnn SHARED IMPORTED)
set_target_properties(libmnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libMNN.so)
add_executable(Orienmask_project main.cpp orienmask_yolo_postprocess.cpp orienmask_yolo_postprocess.h)
target_link_libraries(Orienmask_project ${OpenCV_LIBS} libmnn)
main.cpp源码
#include <opencv2/highgui/highgui.hpp>
#include<MNN/Interpreter.hpp>
#include<MNN/ImageProcess.hpp>
#include "orienmask_yolo_postprocess.h"
#include <iostream>
#include <numeric>
#include<opencv2/opencv.hpp>
void get_result_data(float* output_ptr,int total_elementSize, std::vector<float> &vec_result) {
for (int i = 0; i < total_elementSize; i++) {
vec_result.push_back(output_ptr[i]);
}
}
void InferenceVisualizer(std::vector<std::vector<float>> bbox_dets,
std::vector<std::vector<int>> mask_masks,
std::vector<int> cls_cats,
cv::Mat img_dst,
bool with_mask,
float confidence,
std::vector<int> pad_info,
std::vector<std::string> classes,
int img_w,
int img_h,
std::vector<cv::Scalar> PALETTE
) {
int left = pad_info[0];
int right = pad_info[1];
int top = pad_info[2];
int down = pad_info[3];
int h = pad_info[4];
int w = pad_info[5];
int nh = h - top - down;
int nw = w - left - right;
std::vector<std::vector<int>> mask_dets;
std::vector<cv::Scalar> color;
int length = bbox_dets.size();
for (int i = 0; i < length; i++) {
float conf = bbox_dets[i][4];
if (conf > confidence) {
int colors_idx = length * 5 + rand() % PALETTE.size();
int colors_real = colors_idx % PALETTE.size();
if (with_mask) {
for (int y = 0; y < img_dst.rows; y++) {
uchar *p = img_dst.ptr(y);
for (int x = 0; x < img_dst.cols; x++) {
if (mask_masks[i][y * img_dst.cols + x]) {
p[0] = cv::saturate_cast<uchar>(p[0] * 0.5 + PALETTE[colors_real][0] * 0.5);
p[1] = cv::saturate_cast<uchar>(p[1] * 0.5 + PALETTE[colors_real][1] * 0.5);
p[2] = cv::saturate_cast<uchar>(p[2] * 0.5 + PALETTE[colors_real][2] * 0.5);
}
p += 3;
}
}
}
float bx = (bbox_dets[i][0] * w - left) / nw;
float by = (bbox_dets[i][1] * h - top) / nh;
float bw = bbox_dets[i][2] * w / nw;
float bh = bbox_dets[i][3] * h / nh;
int bx1 = ceil((bx - bw / 2) * w); //修改了 这里 不使用实际的宽和高img_h h img_w w 这样就不用后面的下采样函数了 最后统一使用resize图片就行
int by1 = ceil((by - bh / 2) * h); //对比数据 使用 img_w img_h
int bx2 = ceil((bx + bw / 2) * w);
int by2 = ceil((by + bh / 2) * h);
cv::rectangle(img_dst, cv::Point(bx1, by1), cv::Point(bx2, by2), PALETTE[colors_real]); //两点的方式
char text[256];
sprintf(text, "%s %.1f%%", classes[cls_cats[i]].c_str(), conf * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = bx1;
int y = by1 - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > img_dst.cols)
x = img_dst.cols - label_size.width;
cv::rectangle(img_dst, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
PALETTE[colors_real], -1);
cv::putText(img_dst, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_DUPLEX, 0.4, cv::Scalar(255, 255, 255), 1, cv::LINE_AA);
}
}
cv::resize(img_dst, img_dst, cv::Size(img_w, img_h), 0, 0, cv::INTER_LINEAR);//去掉了代码中下采样操作
cv::imshow("demo", img_dst);
cv::imwrite("ncnn.jpg", img_dst);
cv::waitKey(0);
}
int main() {
std::vector<std::string> classes = {
"person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
"boat", "traffic-light", "fire-hydrant", "stop-sign", "parking-meter", "bench",
"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
"giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
"snowboard", "sports-ball", "kite", "baseball-bat", "baseball-glove", "skateboard",
"surfboard", "tennis-racket", "bottle", "wine-glass", "cup", "fork", "knife",
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
"hot-dog", "pizza", "donut", "cake", "chair", "sofa", "potted-plant", "bed",
"dining-table", "toilet", "tv-monitor", "laptop", "mouse", "remote", "keyboard",
"cell-phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
"clock", "vase", "scissors", "teddy-bear", "hair-drier", "toothbrush"
};
std::vector<std::vector<int>> vec_grid_size = {{17, 17},
{34, 34},
{68, 68}};
std::vector<std::vector<int>> vec_anchors = {{12, 16},
{19, 36},
{40, 28},
{36, 75},
{76, 55},
{72, 146},
{142, 110},
{192, 243},
{459, 401}};
std::vector<std::vector<int>> vec_anchor_mask = {{6, 7, 8},
{3, 4, 5},
{0, 1, 2}};
std::vector<cv::Scalar> PALETTE = {
cv::Scalar(233, 30, 99),
cv::Scalar(255, 235, 59),
cv::Scalar(255, 193, 7),
cv::Scalar(255, 152, 0),
cv::Scalar(255, 87, 34),
cv::Scalar(121, 85, 72),
cv::Scalar(158, 158, 158),
cv::Scalar(96, 125, 139),
cv::Scalar(156, 39, 176),
cv::Scalar(103, 58, 183),
cv::Scalar(63, 81, 181),
cv::Scalar(33, 150, 243),
cv::Scalar(3, 169, 244),
cv::Scalar(0, 188, 212),
cv::Scalar(0, 150, 136),
cv::Scalar(76, 175, 80),
cv::Scalar(139, 195, 74),
cv::Scalar(205, 220, 57),
cv::Scalar(244, 67, 54)
};
int num_classes = 80;
float conf_thresh = 0.05;
int nms_pre = 400;
int nms_post = 100;
float orien_thresh = 0.3;
int target_size = 544;
std::vector<int> vec_image_size = {target_size, target_size};
std::vector<int> pad_info={0,0,0,0,target_size, target_size};
OrienMaskYOLOPostProcess *item_Orien = new OrienMaskYOLOPostProcess(vec_grid_size, vec_image_size, vec_anchors,
vec_anchor_mask, num_classes,
conf_thresh, nms_pre,
nms_post, orien_thresh);
cv::Mat img_src = cv::imread("/home/ubuntu/OrienMask/assets/000000163126.jpg");
int img_w=img_src.cols;
int img_h=img_src.rows;
cv::Mat img_dst;
cv::resize(img_src, img_dst, cv::Size(target_size, target_size),0,0,cv::INTER_LINEAR);
std::vector<float> meanVals ={ 0.0f, 0.0f , 0.0f};;
std::vector<float> normVals= { 1.0f / 255.f,1.0f / 255.f,1.0f / 255.f};
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile("../model/orienmask_yolo_sim.mnn"));//创建解释器
std::cout << "Interpreter created" << std::endl;
MNN::ScheduleConfig config;
config.numThread = 8;
config.type = MNN_FORWARD_CPU;
auto session = net->createSession(config);//创建session
std::cout << "session created" << std::endl;
auto inTensor = net->getSessionInput(session, NULL);
auto outTensor = net->getSessionInput(session, NULL);
auto _Tensor = MNN::Tensor::create<float>({1,3,target_size,target_size}, NULL, MNN::Tensor::CAFFE);
if(_Tensor->elementSize()!=3*target_size*target_size)
{
std::cout<<_Tensor->elementSize()<<" "<<img_dst.channels()*img_dst.cols*img_dst.rows<<std::endl;
std::cout<<"input shape not equal image shape"<<std::endl;
return -1;
}
std::vector<cv::Mat> rgbChannels(3);
cv::split(img_dst, rgbChannels);
for (auto i = 0; i < rgbChannels.size(); i++) {
rgbChannels[i].convertTo(rgbChannels[i], CV_32FC1, normVals[i], meanVals[i]);
for(int j=0;j<rgbChannels[i].rows;j++) {
for (int k = 0; k < rgbChannels[i].cols; k++) {
_Tensor->host<float>()[i*target_size*target_size+j*target_size+k] =rgbChannels[i].at<float>(j, k);
//printf("%f ",rgbChannels[i].at<float>(j, k));//用作比对数据 可以换成高级写法
}
}
}
inTensor->copyFromHostTensor(_Tensor);
//推理
net->runSession(session);
std::vector<float> vec_result_0;
std::vector<float> vec_result_1;
std::vector<float> vec_result_2;
std::vector<float> vec_result_3;
std::vector<float> vec_result_4;
std::vector<float> vec_result_5;
auto output0= net->getSessionOutput(session, "770");
MNN::Tensor result0(output0, output0->getDimensionType());
output0->copyToHostTensor(&result0);
float* output_ptr0 = result0.host<float>();
std::cout<<"wh = "<<result0.height()<<" "<<result0.width()<<" "<<result0.channel()<<" "<<result0.elementSize()<<std::endl;
get_result_data(output_ptr0, result0.elementSize(),vec_result_0);
auto output1= net->getSessionOutput(session, "774");
MNN::Tensor result1(output1, output1->getDimensionType());
output1->copyToHostTensor(&result1);
float* output_ptr1 = result1.host<float>();
std::cout<<"wh = "<<result1.height()<<" "<<result1.width()<<" "<<result1.channel()<<" "<<result1.elementSize()<<std::endl;
get_result_data(output_ptr1, result1.elementSize(),vec_result_1);
auto output2= net->getSessionOutput(session, "778");
MNN::Tensor result2(output2, output2->getDimensionType());
output2->copyToHostTensor(&result2);
float* output_ptr2 = result2.host<float>();
std::cout<<"wh = "<<result2.height()<<" "<<result2.width()<<" "<<result2.channel()<<" "<<result2.elementSize()<<std::endl;
get_result_data(output_ptr2, result2.elementSize(),vec_result_2);
auto output3= net->getSessionOutput(session, "845");
MNN::Tensor result3(output3, output3->getDimensionType());
output3->copyToHostTensor(&result3);
float* output_ptr3 = result3.host<float>();
std::cout<<"wh = "<<result3.height()<<" "<<result3.width()<<" "<<result3.channel()<<" "<<result3.elementSize()<<std::endl;
get_result_data(output_ptr3, result3.elementSize(),vec_result_3);
auto output4= net->getSessionOutput(session, "850");
MNN::Tensor result4(output4, output4->getDimensionType());
output4->copyToHostTensor(&result4);
float* output_ptr4 = result4.host<float>();
std::cout<<"wh = "<<result4.height()<<" "<<result4.width()<<" "<<result4.channel()<<" "<<result4.elementSize()<<std::endl;
get_result_data(output_ptr4, result4.elementSize(),vec_result_4);
auto output5= net->getSessionOutput(session, "855");
MNN::Tensor result5(output5, output5->getDimensionType());
output5->copyToHostTensor(&result5);
float* output_ptr5 = result5.host<float>();
std::cout<<"wh = "<<result5.height()<<" "<<result5.width()<<" "<<result5.channel()<<" "<<result5.elementSize()<<std::endl;
get_result_data(output_ptr5, result5.elementSize(),vec_result_5);
//后处理写的尽量不依赖ncnn 还要mnn使用
std::vector<std::vector<int>> mask_masks;
std::vector<std::vector<float>> bbox_dets;
std::vector<int> cls_cats;
item_Orien->apply(vec_result_0, vec_result_1, vec_result_2,
vec_result_3,
vec_result_4, vec_result_5, bbox_dets, mask_masks, cls_cats);
bool with_mask = true;
float confidence = 0.3f;
InferenceVisualizer(bbox_dets, mask_masks, cls_cats, img_dst, with_mask, confidence, pad_info, classes,img_w,img_h,
PALETTE);
delete item_Orien;
return 0;
}
测试结果是一致的
试试orienmask的c++ mnn去识别一下mmdetection的demo图
目前 上图测试时间, ncnn的耗费时间
/home/ubuntu/AndroidStudioProjects/Orienmask_project/cmake-build-debug/Orienmask_project
result_0 shape: 3 1 255 17 17
result_1 shape: 3 1 255 34 34
result_2 shape: 3 1 255 68 68
result_6 shape: 3 1 6 544 544
result_7 shape: 3 1 6 544 544
result_8 shape: 3 1 6 544 544
t5 used time = 1127ms
onnx的耗费时间在 二者都没有计入模型加载时间
excute onnx infer
[329, 58, 1018, 730] 0.9944660067558289 person
[128, 54, 414, 786] 0.8737048506736755 person
793.990559000008 ms
2022-07-24优化后的c++代码和深度相机D435相机结合 速度在680ms上下,代码不公开
2022-07-30代码二次优化到460ms左右
自己训练100步的杯子,数据集只是筛选coco2017的杯子数据集
推理时间 i5 rtx3060 12g pc主机 544图片大小
ubuntu@ubuntu:~/OrienMask$ CUDA_VISIBLE_DEVICES=0 python3 infer.py -c orienmask_yolo_coco_544_anchor4_fpn_plus_infer -w /home/ubuntu/OrienMask/checkpoints/OrienMaskAnchor4FPNPlus_0910_100749/epoch100.pth -i /home/ubuntu/OrienMask/coco/val2017/JPEGImages/000000078426.jpg -v -o outputs
0%| | 0/1 [00:00<?, ?it/s][271, 75, 363, 158] 0.6327356100082397 cup
100%|█████████████████████████████████████████████| 1/1 [00:01<00:00, 1.42s/it]
The inference takes 1.4259593505859376 seconds.
The average inference time is 1425.96 ms (0.70 fps)
Load data: 4.02ms (248.74fps)
Forward & Postprocess: 1399.56ms (0.71fps)
Visualize: 21.15ms (47.28fps)
补充如果需要修改训练尺寸大小,修改/home/ubuntu/OrienMask/config/base.py 文件,保证这个关系即可
ubuntu@ubuntu:~$ python3
Python 3.8.10 (default, Jun 22 2022, 20:18:18)
[GCC 9.4.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> 544/17
32.0
>>> 544/34
16.0
>>> 544/68
8.0
补充一个修改的base.py
import copy
MEAN = [123.675, 116.280, 103.530]
STD = [58.395, 57.120, 57.375]
ANCHORS_MASK = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
ANCHORS_YOLOV3 = [
[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]
]
ANCHORS_YOLOV4 = [
[12, 16], [19, 36], [40, 28],
[36, 75], [76, 55], [72, 146],
[142, 110], [192, 243], [459, 401]
]
def construct_config(config, update=None, pop=None):
"""Construct config from a base config
If a key of `update` matches `config` and both of their values are `dict`,
then the update process will be iteratively executed. Otherwise, it is the
same as built-in `update` function of `dict` type.
The items in `pop` are iterative keys joined by periods. For example,
`top_key.sub_key' means popping config['top_key']['sub_key']. If there is no
period included, then the pop process will be the same as built-in `pop`
function of `dict` type.
Args:
config (dict): the base config
update (dict): update on base config
pop (list): pop keys out of base config
"""
new_config = copy.deepcopy(config)
if update is not None:
for key, value in update.items():
if isinstance(value, dict) and isinstance(new_config.get(key), dict):
new_config[key] = construct_config(new_config[key], update=value)
else:
new_config[key] = value
if pop is not None:
for key in pop:
sub_keys = key.split('.')
sub_config = new_config
for sub_key in sub_keys[:-1]:
sub_config = sub_config[sub_key]
sub_config.pop(sub_keys[-1])
return new_config
# train configuration template
template_train = dict(
name=None, # used to create checkpoint sub-folder
n_gpu=None, # number of gpu devices to train
epochs=None, # total epochs over the train dataset
cudnn_benchmark=None, # to speed up convolution if the input size is fixed
accumulate=None, # accumulate gradients over multiple mini-batches
monitor=None, # the criterion for saving the best model
monitor_mode=None, # three options: 'min', 'max', 'off'
log_dir=None, # checkpoints base directory
val_freq=None, # validation interval (epochs)
save_freq=None, # interval (epochs) to save training checkpoints
log_freq=None, # logging interval (batches) on tensorboard
seed=None, # random seed
trainer=None, # trainer type
model=None,
train_loader=None,
val_loader=None,
val_gt_file=None,
postprocess=None,
loss=None,
optimizer=None,
lr_scheduler=None
)
template_test = dict(
n_gpu=None,
cudnn_benchmark=None,
tester=None,
model=None,
test_loader=None,
postprocess=None,
gt_file=None
)
template_infer = dict(
n_gpu=None,
cudnn_benchmark=True,
model=None,
transform=None,
postprocess=None,
visualizer=None
)
# model configurations
orienmask_yolo_coco = dict(
type="OrienMaskYOLO",
num_anchors=3,
num_classes=80,
pretrained="/home/ubuntu/OrienMask/checkpoints/pretrained/pretrained_darknet53.pth",
freeze_backbone=False,
backbone_batchnorm_eval(
orienmask_yolo_coco,
update=dict(type="OrienMaskYOLOFPNPlus")
)
# dataset configurations
coco_train_dataset = dict(
type="COCODataset",
list_file="/home/ubuntu/OrienMask/coco/list/coco_train.txt",
image_dir="/home/ubuntu/OrienMask/coco/train2017",
anno_file="/home/ubuntu/OrienMask/coco/annotations/orienmask_coco_train.json",
with_mask=True,
with_info=False
)
coco_val_dataset = dict(
type="COCODataset",
list_file="/home/ubuntu/OrienMask/coco/list/coco_val.txt",
image_dir="/home/ubuntu/OrienMask/coco/val2017",
anno_file="/home/ubuntu/OrienMask/coco/annotations/orienmask_coco_val.json",
with_mask=True,
with_info=True
)
# transform configurations
transform_train_544 = dict(
type="COCOTransform",
pipeline=[
dict(type="ColorJitter", brightness=0.2, contrast=0.5, saturation=0.5, hue=0.1),
dict(type="RandomCrop", p=0.5, image_min_iou=0.64, bbox_min_iou=0.64),
dict(type="Resize", size=(320,320), pad_needed=True, warp_p=0.25, jitter=0.3,
random_place=True, pad_p=0.75, pad_ratio=0.75, pad_value=MEAN),
dict(type="RandomHorizontalFlip", p=0.5),
dict(type="ToTensor"),
dict(type="Normalize", mean=(0, 0, 0), std=(255, 255, 255))
]
)
transform_val_544 = dict(
type="COCOTransform",
pipeline=[
dict(type="Resize", size=(320,320), pad_needed=False, warp_p=0., jitter=0.,
random_place=False, pad_p=0., pad_ratio=0., pad_value=MEAN),
dict(type="ToTensor"),
dict(type="Normalize", mean=(0, 0, 0), std=(255, 255, 255))
]
)
transform_infer_544 = dict(
type="FastCOCOTransform",
pipeline=[
dict(type="Resize", size=(320, 320), interpolation='bilinear', align_corners=False),
dict(type="Normalize", mean=(0, 0, 0), std=(255, 255, 255))
]
)
# dataloader configurations
coco_544_train_loader = dict(
type="DataLoader",
dataset=coco_train_dataset,
transform=transform_train_544,
batch_size=1,
num_workers=4,
shuffle=True,
pin_memory=False,
collate=dict(type="collate")
)
coco_544_val_loader = dict(
type="DataLoader",
dataset=coco_val_dataset,
transform=transform_val_544,
batch_size=1,
num_workers=4,
shuffle=False,
pin_memory=False,
collate=dict(type="collate")
)
# ground truth files for coco eval(
type="OrienMaskYOLOMultiScaleLoss",
grid_size=[[10, 10], [20, 20], [40, 40]],
image_size=[320, 320],
anchors=ANCHORS_YOLOV3,
anchor_mask=ANCHORS_MASK,
#num_classes=80,
num_classes=80,
center_region=0.6,
valid_region=0.6,
label_smooth=False,
obj_ignore_threshold=0.7,
weight=[1, 1, 1, 1, 1, 20, 20],
scales_weight=[1, 1, 1]
)
orienmask_yolo_coco_544_anchor4_loss = construct_config(
orienmask_yolo_coco_544_loss,
update=dict(anchors=ANCHORS_YOLOV4)
)
# postprocess configurations
orienmask_yolo_coco_544_postprocess = dict(
type="OrienMaskYOLOPostProcess",
grid_size=[[10, 10], [20, 20], [40, 40]],
image_size=[320, 320],
anchors=ANCHORS_YOLOV3,
anchor_mask=ANCHORS_MASK,
#num_classes=80,
num_classes=80,
conf_thresh=0.005,
nms=dict(type='batched_nms', threshold=0.5),
nms_pre=400,
nms_post=100,
orien_thresh=0.3
)
orienmask_yolo_coco_544_anchor4_postprocess = construct_config(
orienmask_yolo_coco_544_postprocess,
update=dict(anchors=ANCHORS_YOLOV4)
)
# optimizer configurations
base_sgd = dict(
type="SGD",
#lr=1e-3,
lr=1e-4,
momentum=0.9,
weight_decay=5e-4,
)
# learning rate scheduler configurations
step_lr_warmup_coco_e100 = dict(
type="StepWarmUpLR",
warmup_type="linear",
#warmup_iter=1000,
warmup_iter=200,
warmup_ratio=0.1,
milestones=[520000, 660000],
gamma=0.1
)
# visualizer configurations
coco_visualizer = dict(
type="InferenceVisualizer",
dataset="COCO",
with_mask=True,
conf_thresh=0.3,
alpha=0.6,
line_thickness=1
)
测试100步训练结果 320大小图片
ubuntu@ubuntu:~/OrienMask$ CUDA_VISIBLE_DEVICES=0 python3 infer.py -c orienmask_yolo_coco_544_anchor4_fpn_plus_infer -w /home/ubuntu/OrienMask/checkpoints/OrienMaskAnchor4FPNPlus_0910_110504/epoch100.pth -i /home/ubuntu/OrienMask/coco/val2017/JPEGImages/000000078426.jpg -v -o outputs
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 2.17it/s]
The inference takes 0.4614634704589844 seconds.
The average inference time is 461.46 ms (2.17 fps)
Load data: 4.08ms (245.31fps)
Forward & Postprocess: 427.64ms (2.34fps)
Visualize: 28.21ms (35.45fps)