基本思想:对现存的数据集和json文件进行去中文字段
# -*- coding: utf-8 -*-
import xmltodict
import os
import sys
import json
import io
import os
from xml.dom.minidom import Document
############################## 将原来的数据切记备份,以防发生意外#############################
global null
null = ''
TOTAL=[]
def file_name(file_dir):
L = []
for root, dirs, files in os.walk(file_dir):
for file in files:
if os.path.splitext(file)[1] == '.json':
L.append(os.path.join(root, file))
return L
path = r'F:\\sxj\\20210813\\sss'
m_folder = path.split("/")[-1]
print('m_folder=', m_folder)
m_database = 'Unknown'
print('m_database=', m_database)
m_depth = 3
print('m_depth=', m_depth)
m_segmented = 0
print('m_segmented=', m_segmented)
m_pose = 'Unspecified'
print('m_pose=', m_pose)
m_truncated = 0
print('m_truncated=', m_truncated)
m_difficult = 0
print('m_difficult=', m_difficult)
m_segmented = 0
print('m_segmented=', m_segmented)
path_list = file_name(path)
for name in enumerate(path_list):
m_path = name[1]
dir = os.path.dirname(m_path)
print('dir=', dir)
file_json = io.open(m_path, 'r', encoding='utf-8')
json_data = file_json.read()
data = json.loads(json_data)
m_filename = data['imagePath'].replace("#塔头","")
src = os.path.join(os.path.abspath(dir), data['imagePath']) # 原先的图片名字
dst = os.path.join(os.path.abspath('F:\\sxj\\20210813\\label\\'), m_filename) # 根据自己的需要重新命名,可以把'E_' + img改成你想要的名字
if not os.path.isfile(dst):
os.rename(src, dst) # 重命名,覆盖原先的名字
print('m_filename=', m_filename)
m_path = dir + '/' + m_filename
print('m_path=', m_path)
m_width = data['imageWidth']
print('m_width=', m_width)
m_height = data['imageHeight']
print('m_height=', m_height)
object_name = os.path.splitext(m_filename)[0]
new_object_name = object_name + '.xml'
print(new_object_name)
doc = Document() # 创建DOM文档对象
DOCUMENT = doc.createElement('annotation') # 创建根元素
floder = doc.createElement('floder')
floder_text = doc.createTextNode(m_folder)
floder.appendChild(floder_text)
DOCUMENT.appendChild(floder)
doc.appendChild(DOCUMENT)
filename = doc.createElement('filename')
filename_text = doc.createTextNode(m_filename)
filename.appendChild(filename_text)
DOCUMENT.appendChild(filename)
doc.appendChild(DOCUMENT)
path = doc.createElement('path')
path_text = doc.createTextNode(m_path)
path.appendChild(path_text)
DOCUMENT.appendChild(path)
doc.appendChild(DOCUMENT)
source = doc.createElement('source')
database = doc.createElement('database')
database_text = doc.createTextNode(m_database) # 元素内容写入
database.appendChild(database_text)
source.appendChild(database)
DOCUMENT.appendChild(source)
doc.appendChild(DOCUMENT)
size = doc.createElement('size')
width = doc.createElement('width')
width_text = doc.createTextNode(str(m_width)) # 元素内容写入
width.appendChild(width_text)
size.appendChild(width)
height = doc.createElement('height')
height_text = doc.createTextNode(str(m_height))
height.appendChild(height_text)
size.appendChild(height)
depth = doc.createElement('depth')
depth_text = doc.createTextNode(str(m_depth))
depth.appendChild(depth_text)
size.appendChild(depth)
DOCUMENT.appendChild(size)
segmented = doc.createElement('segmented')
segmented_text = doc.createTextNode(str(m_segmented))
segmented.appendChild(segmented_text)
DOCUMENT.appendChild(segmented)
doc.appendChild(DOCUMENT)
for i in range(len(data['shapes'])):
m_xmin_0 = (data['shapes'][i]['points'][0][0] if (
data['shapes'][i]['points'][0][0] < data['shapes'][i]['points'][1][0]) else
data['shapes'][i]['points'][1][0])
print('m_xmin_0=', m_xmin_0)
m_ymin_0 = (data['shapes'][i]['points'][0][1] if (
data['shapes'][i]['points'][0][1] < data['shapes'][i]['points'][1][1]) else
data['shapes'][i]['points'][1][1])
print('m_ymin_0=', m_ymin_0)
m_xmax_0 = (data['shapes'][i]['points'][1][0] if (
data['shapes'][i]['points'][0][0] < data['shapes'][i]['points'][1][0]) else
data['shapes'][i]['points'][0][0])
print('m_xmax_0=', m_xmax_0)
m_ymax_0 = (data['shapes'][i]['points'][1][1] if (
data['shapes'][i]['points'][0][1] < data['shapes'][i]['points'][1][1]) else
data['shapes'][i]['points'][0][1])
print('m_ymax_0=', m_ymax_0)
m_name_0 = data['shapes'][i]['label']
#print('m_name_0=', m_name_0)
if m_name_0=="person" or m_name_0=="phone":
print()
else:
print("#############################################################", m_name_0)
TOTAL.append(m_name_0)
object = doc.createElement('object')
name = doc.createElement('name')
name_text = doc.createTextNode(m_name_0)
name.appendChild(name_text)
object.appendChild(name)
pose = doc.createElement('pose')
pose_text = doc.createTextNode(m_pose)
pose.appendChild(pose_text)
object.appendChild(pose)
truncated = doc.createElement('truncated')
truncated_text = doc.createTextNode(str(m_truncated))
truncated.appendChild(truncated_text)
object.appendChild(truncated)
bndbox = doc.createElement('bndbox')
xmin = doc.createElement('xmin')
xmin_text = doc.createTextNode(str(m_xmin_0+2))
xmin.appendChild(xmin_text)
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin_text = doc.createTextNode(str(m_ymin_0+2))
ymin.appendChild(ymin_text)
bndbox.appendChild(ymin)
xmax = doc.createElement('xmax')
xmax_text = doc.createTextNode(str(m_xmax_0-2))
xmax.appendChild(xmax_text)
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax_text = doc.createTextNode(str(m_ymax_0-2))
ymax.appendChild(ymax_text)
bndbox.appendChild(ymax)
object.appendChild(bndbox)
DOCUMENT.appendChild(object)
new_path_filename = 'F:\\sxj\\20210813\\label\\' + new_object_name.replace("#塔头","")
print('new_path_filename=', new_path_filename)
f = open(new_path_filename, 'w')
doc.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
f.close()
print(set(TOTAL))
修改labelimg标注的xml文件部分内容,涉及修改folder filename path
import xml.etree.ElementTree as ET
import os
import cv2
from xml.etree.ElementTree import parse, Element
path = r'F:\data\data\total'
sv_path = r"F:\data\data\A" # 修改后的xml文件存放路径
list_d=[]
files = os.listdir(path) # 读取路径下所有文件名
for File in files:
if File.endswith('.xml'):
# encoding:utf-8
(filename, extension) = os.path.splitext(File)
img_name=".".join([filename,"jpg"])
xml_path=os.path.join(path,File)
dom = parse(xml_path) ###最核心的部分,路径拼接,输入的是具体路径
root = dom.getroot()
print(root)
path1=os.path.join(sv_path,File)
for obj in root.iter('annotation'): # 获取object节点中的name子节点
obj.find('filename').text=img_name
name1 = obj.find('filename').text # 修改
obj.find('path').text = img_name
name2 = obj.find('path').text # 修改
obj.find('folder').text = "sxj731533730"
name3 = obj.find('folder').text # 修改
for item in obj.iter("object"):
name4 = item.find("name").text # 修改
if name4=="佩戴安全帽":
print(name4)
item.find("name").text="helmet"
elif name4=="穿戴工作服":
print(name4)
item.find("name").text="coverall"
elif name4 == "穿戴安全带":
print(name4)
item.find("name").text = "seatbelt"
elif name4 == "没戴安全帽":
print(name4)
item.find("name").text = "no_helmet"
elif name4 == "没穿工作服":
print(name4)
item.find("name").text = "no_coverall"
elif name4 == "没穿安全带":
print(name4)
item.find("name").text = "no_seatbelt"
if item.find("name").text not in list_d:
list_d.append(item.find("name").text)
dom.write(path1, xml_declaration=True) # 保存到指定文件
print(list_d)
重命名xml和图片,并且去掉空格
import os
import cv2
from xml.etree.ElementTree import parse, Element
path = r'F:\te\test_detector_xml_format\test_detector_xml_format\xml_format\total_Dataset'
sv_path = r"F:\te\test_detector_xml_format\test_detector_xml_format\xml_format\xml" # 修改后的xml文件存放路径
img_path = r"F:\te\test_detector_xml_format\test_detector_xml_format\xml_format\images" # 修改后的xml文件存放路径
set_d = set()
files = os.listdir(path) # 读取路径下所有文件名
for File in files:
if File.endswith('.xml'):
# encoding:utf-8
(filename, extension) = os.path.splitext(File)
new_filename=filename.replace(" ","_",1)
img_name = ".".join([new_filename, "jpg"])
xml_path = os.path.join(path, File)
dom = parse(xml_path) ###最核心的部分,路径拼接,输入的是具体路径
root = dom.getroot()
print(root)
path1 = os.path.join(sv_path, new_filename+".xml")
for obj in root.iter('annotation'): # 获取object节点中的name子节点
obj.find('filename').text = img_name
name1 = obj.find('filename').text # 修改
obj.find('path').text = img_name
name2 = obj.find('path').text # 修改
obj.find('folder').text = "sxj731533730"
name3 = obj.find('folder').text # 修改
for item in obj.iter("object"):
name = item.find("name").text # 修改
set_d.add(name)
dom.write(path1, xml_declaration=True) # 保存到指定文件
elif File.endswith('.jpg'):
(filename, extension) = os.path.splitext(File)
new_filename = filename.replace(" ", "_", 1)
img=cv2.imread(os.path.join(path,File))
img_dest=os.path.join(img_path,new_filename+".jpg")
cv2.imwrite(img_dest,img)
print(set_d)