基本思想:训练OrienMask模型,然后部署深度相机进行检测和深度信息输出,也正好我的新电脑到了,RTX3060
模型 链接: https://pan.baidu.com/s/1J3G3uQ09p1teapLJ7TpuVg 提取码: cg4h
ubuntu@ubuntu:~$ nvidia-smi
Tue Jul 19 22:58:37 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.57 Driver Version: 515.57 CUDA Version: 11.7 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... Off | 00000000:01:00.0 Off | N/A |
| 0% 38C P8 13W / 170W | 18MiB / 12288MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 785 G /usr/lib/xorg/Xorg 16MiB |
+-----------------------------------------------------------------------------+
第一步:爬虫数据,参考知乎大佬,看附录参考地址,爬纸杯子图片600张
# -*- coding: utf-8 -*-
import requests
import os
import re
def get_images_from_baidu(keyword, page_num, save_dir):
# UA 伪装:当前爬取信息伪装成浏览器
# 将 User-Agent 封装到一个字典中
# 【(网页右键 → 审查元素)或者 F12】 → 【Network】 → 【Ctrl+R】 → 左边选一项,右边在 【Response Hearders】 里查找
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
# 请求的 url
url = 'https://image.baidu.com/search/acjson?'
n = 0
for pn in range(0, 30 * page_num, 30):
# 请求参数
param = {'tn': 'resultjson_com',
# 'logid': '7603311155072595725',
'ipn': 'rj',
'ct': 201326592,
'is': '',
'fp': 'result',
'queryWord': keyword,
'cl': 2,
'lm': -1,
'ie': 'utf-8',
'oe': 'utf-8',
'adpicid': '',
'st': -1,
'z': '',
'ic': '',
'hd': '',
'latest': '',
'copyright': '',
'word': keyword,
's': '',
'se': '',
'tab': '',
'width': '',
'height': '',
'face': 0,
'istype': 2,
'qc': '',
'nc': '1',
'fr': '',
'expermode': '',
'force': '',
'cg': '', # 这个参数没公开,但是不可少
'pn': pn, # 显示:30-60-90
'rn': '30', # 每页显示 30 条
'gsm': '1e',
'1618827096642': ''
}
request = requests.get(url=url, headers=header, params=param)
if request.status_code == 200:
print('Request success.')
request.encoding = 'utf-8'
# 正则方式提取图片链接
html = request.text
image_url_list = re.findall('"thumbURL":"(.*?)",', html, re.S)
print(image_url_list)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for image_url in image_url_list:
image_data = requests.get(url=image_url, headers=header).content
with open(os.path.join(save_dir, f'{n:06d}.jpg'), 'wb') as fp:
fp.write(image_data)
n = n + 1
if __name__ == '__main__':
keyword = '纸杯'
root="/home/ubuntu/Downloads/"
save_dir =os.path.join(root,keyword)
page_num = 20 #实际是每页30 page_num*30个数据
get_images_from_baidu(keyword, page_num, save_dir)
print('Get images finished.')
第二步:标注数据集
使用百度飞浆的EISeg软件进行安装和标注,PaddleSeg/contrib/EISeg at release/2.2 · PaddlePaddle/PaddleSeg · GitHub ,我安装的paddle-GPU版本
https://www.paddlepaddle.org.cn/
1、设置仅选json格式且其它图片修正背景方式不用,只用生成原始图片的json能对应上原图片就行,标注完成之后,使用脚本转成labelme进行矫正标注,导入模型,标注还是蛮方便的
这样的杯子,标注一个就行,否则会连体,那样标注就不对了,在标注一个之后,在转labelme进行详细标注,但是对于单个目标就直接标注,还是蛮快的
2、标注完成之后使用eiseg2labelme脚本进行转labelme的json格式进行修正标注
# -*- coding: utf-8 -*-
import os
import json
import io
import glob
import cv2
dest="/home/ubuntu/Downloads/dest/"
if not os.path.exists(dest):
os.mkdir(dest)
root='/home/ubuntu/Downloads/cup'
list_dir=glob.glob(root+"/*.json")
for list_item in list_dir:
filepath, m_filename = os.path.split(list_item)
m_shortname, extension = os.path.splitext(m_filename)
m_pic_name=".".join([m_shortname,"jpg"])
jpg_path=os.path.join(filepath,m_pic_name)
m_width=None
m_height=None
if os.path.exists(jpg_path):
img=cv2.imread(jpg_path)
cv2.imwrite(os.path.join(dest,m_pic_name),img)
m_width=img.shape[1]
m_height=img.shape[0]
print(m_width,m_height)
else:
exit("picture is not exist")
file_json = io.open(list_item, 'r', encoding='utf-8')
json_data = file_json.read()
m_data = json.loads(json_data)
data = {}
data['imagePath'] = m_pic_name
data['flags'] = {}
data['imageWidth'] = m_width
data['imageHeight'] = m_height
data['imageData'] = None
data['version'] = "4.5.6"
data["shapes"] = []
for item in m_data:
m_label=item['name']
print(m_label)
itemData = {'points': []}
for child in item['points']: # 找到图片中的所有框
x = float(child[0])
y = float(child[1])
itemData['points'].append([x,y])
print(itemData)
itemData["flag"] = {}
itemData["group_id"] = None
itemData["shape_type"] = "polygon"
itemData["label"] = m_label
data["shapes"].append(itemData)
jsonPath = os.path.join(dest, m_filename)
with open(jsonPath, "w") as f:
json.dump(data, f)
print("加载入文件完成...")
3、生成coco数据集,实例分割转化 进行训练准备 代码来自 https://github.com/zsffuture/labelme/blob/master/examples/instance_segmentation
import argparse
import collections
import datetime
import glob
import json
import os
import os.path as osp
import sys
import uuid
import imgviz
import numpy as np
import labelme
try:
import pycocotools.mask
except ImportError:
print("Please install pycocotools:\n\n pip install pycocotools\n")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--input_dir",default="/home/ubuntu/Downloads/dest", help="input annotated directory")
parser.add_argument("--output_dir", default="/home/ubuntu/Downloads/coco",help="output dataset directory")
parser.add_argument("--labels",default="/home/ubuntu/Downloads/labels.txt", help="labels file")
parser.add_argument(
"--noviz", help="no visualization", action="store_true"
)
args = parser.parse_args()
if not os.path.exists(args.output_dir):
print("Output directory already exists:", args.output_dir)
os.makedirs(args.output_dir)
os.makedirs(osp.join(args.output_dir, "JPEGImages"))
print("Creating dataset:", args.output_dir)
now = datetime.datetime.now()
data = dict(
info=dict(
description=None,
url=None,
version=None,
year=now.year,
contributor=None,
date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
),
licenses=[dict(url=None, id=0, name=None,)],
images=[
# license, url, file_name, height, width, date_captured, id
],
type="instances",
annotations=[
# segmentation, area, iscrowd, image_id, bbox, category_id, id
],
categories=[
# supercategory, id, name
],
)
class_name_to_id = {}
for i, line in enumerate(open(args.labels).readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
if class_id == -1:
assert class_name == "__ignore__"
continue
class_name_to_id[class_name] = class_id
data["categories"].append(
dict(supercategory=None, id=class_id, name=class_name,)
)
out_ann_file = osp.join(args.output_dir, "annotations.json")
label_files = glob.glob(osp.join(args.input_dir, "*.json"))
for image_id, filename in enumerate(label_files):
print("Generating dataset from:", filename)
label_file = labelme.LabelFile(filename=filename)
print(label_file)
base = osp.splitext(osp.basename(filename))[0]
out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")
img = labelme.utils.img_data_to_arr(label_file.imageData)
imgviz.io.imsave(out_img_file, img)
data["images"].append(
dict(
license=0,
url=None,
file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)),
height=img.shape[0],
width=img.shape[1],
date_captured=None,
id=image_id,
)
)
masks = {} # for area
segmentations = collections.defaultdict(list) # for segmentation
for shape in label_file.shapes:
points = shape["points"]
label = shape["label"]
group_id = shape.get("group_id")
shape_type = shape.get("shape_type", "polygon")
mask = labelme.utils.shape_to_mask(
img.shape[:2], points, shape_type
)
if group_id is None:
group_id = uuid.uuid1()
instance = (label, group_id)
if instance in masks:
masks[instance] = masks[instance] | mask
else:
masks[instance] = mask
if shape_type == "rectangle":
(x1, y1), (x2, y2) = points
x1, x2 = sorted([x1, x2])
y1, y2 = sorted([y1, y2])
points = [x1, y1, x2, y1, x2, y2, x1, y2]
else:
points = np.asarray(points).flatten().tolist()
segmentations[instance].append(points)
segmentations = dict(segmentations)
for instance, mask in masks.items():
cls_name, group_id = instance
if cls_name not in class_name_to_id:
continue
cls_id = class_name_to_id[cls_name]
mask = np.asfortranarray(mask.astype(np.uint8))
mask = pycocotools.mask.encode(mask)
area = float(pycocotools.mask.area(mask))
bbox = pycocotools.mask.toBbox(mask).flatten().tolist()
data["annotations"].append(
dict(
id=len(data["annotations"]),
image_id=image_id,
category_id=cls_id,
segmentation=segmentations[instance],
area=area,
bbox=bbox,
iscrowd=0,
)
)
if not args.noviz:
labels, captions, masks = zip(
*[
(class_name_to_id[cnm], cnm, msk)
for (cnm, gid), msk in masks.items()
if cnm in class_name_to_id
]
)
viz = imgviz.instances2rgb(
image=img,
labels=labels,
masks=masks,
captions=captions,
font_size=15,
line_width=2,
)
out_viz_file = osp.join(
args.output_dir, "Visualization", base + ".jpg"
)
imgviz.io.imsave(out_viz_file, viz)
with open(out_ann_file, "w") as f:
json.dump(data, f)
if __name__ == "__main__":
main()
coco数据集转labelme数据集 实例分割转化
# -*- coding: utf-8 -*-
import glob
import os
import cv2
import json
import io
coco=["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant","" ,"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "","backpack", "umbrella","", "","handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle","", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed","", "dining table", "","","toilet", "","tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "","clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"]
label=dict()
for idx,item in enumerate(coco):
label.update({idx:item})
labelme_path='/home/ubuntu/Downloads/test/total1'
coco_json_path = '/home/ubuntu/Downloads/test/human_save_2'
jpg_path='/home/ubuntu/Downloads/test/human_save_2'
coco_json=glob.glob(os.path.join(coco_json_path,"*.json"))[0]
file_json = io.open(coco_json,'r',encoding='utf-8')
m_json_data = file_json.read()
m_data = json.loads(m_json_data)
#m_type=m_data['type']
for item in m_data['images']:
flag=False
m_images_file_name = item['file_name']
(filename_path, m_filename) = os.path.split(m_images_file_name)
(m_name, extension) = os.path.splitext(m_filename)
m_image=cv2.imread(os.path.join(jpg_path,m_name+".jpg"))
m_images_height = item['height']
m_images_width = item['width']
m_images_id = item['id']
data = {}
data['imagePath'] = m_filename
data['flags'] = {}
data['imageWidth'] = m_images_width
data['imageHeight'] = m_images_height
data['imageData'] = None
data['version'] = "5.0.1"
data["shapes"] = []
for annit in m_data['annotations']:
m_image_id=annit['image_id']
m_category_id=annit['category_id']
if m_image_id==m_images_id and label[m_category_id-1]=='person':
flag = True
for segitem in annit['segmentation']:
points = []
for idx in range(0,len(segitem),2):
x,y=segitem[idx],segitem[idx+1]
if str(x).isalpha() or str(y).isalpha():
flag = False
break
points.append([x,y])
itemData = {'points': []}
if len(points)==0:
flag = False
break
itemData['points'].extend(points)
itemData["flag"] = {}
itemData["group_id"] = None
itemData["shape_type"] = "polygon"
itemData["label"] =label[m_category_id-1]
data["shapes"].append(itemData)
if flag:
jsonName = ".".join([m_name, "json"])
jpgName = ".".join([m_name, "jpg"])
print(labelme_path, jsonName)
jsonPath = os.path.join(labelme_path, jsonName)
jpgPath = os.path.join(labelme_path, jpgName)
with open(jsonPath, "w") as f:
json.dump(data, f)
cv2.imwrite(jpgPath,m_image)
print("加载入文件完成...")
labels.txt的内容为
__ignore__
cup
数据集的目录结构
ubuntu@ubuntu:~/OrienMask/coco$ tree -L 2
.
├── annotations
│ ├── image_info_test-dev2017.json
│ ├── instances_train2017.json
│ ├── instances_val2017.json
│ ├── orienmask_coco_train.json
│ └── orienmask_coco_val.json
├── list
│ ├── coco_train.txt
│ └── coco_val.txt
├── test2017
│ └── JPEGImages
├── train2017
│ └── JPEGImages
└── val2017
└── JPEGImages
8 directories, 7 files
第三步、生成数据集修改配置文件进行训练
1、修改/home/ubuntu/OrienMask/config/config_train.py 修改gpu的数量 我台式机RTX3060 只有一个显卡
#n_gpu=2,
n_gpu=1,
2、/home/ubuntu/OrienMask/config/base.py 全部改成了绝对地址,数据集路径和权重文件 全部改成num_classes=1
# model configurations
orienmask_yolo_coco = dict(
type="OrienMaskYOLO",
num_anchors=3,
num_classes=1,
pretrained="/home/ubuntu/OrienMask/checkpoints/pretrained/pretrained_darknet53.pth",
freeze_backbone=False,
backbone_batchnorm_eval(
orienmask_yolo_coco,
update=dict(type="OrienMaskYOLOFPNPlus")
)
# dataset configurations
coco_train_dataset = dict(
type="COCODataset",
list_file="/home/ubuntu/OrienMask/coco/list/coco_train.txt",
image_dir="/home/ubuntu/OrienMask/coco/train2017",
anno_file="/home/ubuntu/OrienMask/coco/annotations/orienmask_coco_train.json",
with_mask=True,
with_info=False
)
coco_val_dataset = dict(
type="COCODataset",
list_file="/home/ubuntu/OrienMask/coco/list/coco_val.txt",
image_dir="/home/ubuntu/OrienMask/coco/val2017",
anno_file="/home/ubuntu/OrienMask/coco/annotations/orienmask_coco_val.json",
with_mask=True,
with_info=True
)
标签改成data/dataset.py
CLASSES = ['cup']
第四步、开始训练模型
ubuntu@ubuntu:~/OrienMask$ CUDA_VISIBLE_DEVICES=0 python3 train.py -c orienmask_yolo_coco_544_anchor4_fpn_plus
[DarkNet53] Load pretrained model /home/ubuntu/OrienMask/checkpoints/pretrained/pretrained_darknet53.pth
Set checkpoint directory: checkpoints/OrienMaskAnchor4FPNPlus_0719_172036
2022-07-19 22:20:36,624
--------------------------------------------------------------------
2022-07-19 22:20:36,624 [EPOCH 1]
2022-07-19 22:20:36,624 Train on epoch 1
100%|███████████████| 76/76 [00:16<00:00, 4.62it/s, lr=1.45e-04, loss=459.1328]
2022-07-19 22:20:53,107 Finish at 2022-07-19 17:20:53.107502, Runtime: 0:00:16.482666
2022-07-19 22:20:53,108
+----------------+---------+---------+---------+---------+
| TRAIN | S32 | S16 | S08 | ALL |
+----------------+---------+---------+---------+---------+
| loss_xy | 3.520 | 2.068 | 0.183 | 5.772 |
| loss_wh | 84.228 | 5.052 | 0.020 | 89.301 |
| loss_obj | 19.675 | 19.991 | 2.254 | 41.921 |
| loss_noobj | 11.339 | 122.505 | 114.140 | 247.984 |
| loss_cls | 0.659 | 0.365 | 0.031 | 1.054 |
| loss_orien_pos | 9.318 | 8.909 | 0.772 | 18.999 |
| loss_orien_neg | 5.341 | 6.549 | 0.241 | 12.131 |
| loss_sum | 134.079 | 165.441 | 117.640 | 417.161 |
+----------------+---------+---------+---------+---------+
测试模型,效果蛮不错的
train图
val图
工程代码已经提交gitlab
想学TensorRT,把这个框架当个例子,进一步学习写TensorR代码
参考:
PaddleSeg/contrib/EISeg at release/2.2 · PaddlePaddle/PaddleSeg · GitHub
【Python 爬虫】爬取百度图片 - 知乎