文章目录
- 一、前言
- 二、代码
- 2.1 简单的示例
- 2.2 示例二:分割验证码代码
- 2.3 示例三:水平投影获取页面表格区域
- 三、基于连通域标记的灰度图的前景背景分割
一、前言
- 字符分割有很多方法,根据自己的需要来分析,那种方法更加适合自己。大致适而言有两种方法:投影分割法和连通域分割法。
- 投影法的原理:利用二值化图片的像素的分布直方图进行分析,从而找出相邻字符的分界点进行分割。
- 投影有: 水平投影 和 垂直投影
二、代码
2.1 简单的示例
import cv2
import numpy as np
def read_img(img):
(_, thresh) = cv2.threshold(img, 150, 255, cv2.THRESH_BINARY)
kernel_2 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) # 形态学处理:定义矩形结构
closed_2 = cv2.erode(thresh, kernel_2, iterations=2) # 闭运算:迭代2次
kernel_5 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) # 形态学处理:定义矩形结构
closed_5 = cv2.erode(thresh, kernel_5, iterations=5) # 闭运算:迭代5次
cv2.imshow('original_img', img)
cv2.imshow("closed_2",closed_2)
cv2.imshow("closed_5",closed_5)
cv2.waitKey(0)
return closed_2,closed_5
def project_img(image):
height, width = image.shape[:2]
print("image.shape",image.shape)
# 垂直投影:统计并存储每一列的黑点数
vertical = np.zeros(width,dtype=np.int32)
for x in range(0, width):
for y in range(0, height):
if image[y, x] == 0:
vertical[x]+=1
# 水平投影 #统计每一行的黑点数
horizontal = np.zeros(height,dtype=np.int32)
for y in range(0, height):
for x in range(0, width):
if image[y, x] == 0:
horizontal[y] += 1
# 创建空白图片,绘制垂直投影图
emptyImage = np.zeros((height, width, 3), np.uint8)
for x in range(0, width):
for y in range(0, vertical[x]):
b = (255, 255, 255)
emptyImage[y, x] = b
# 绘制水平投影图
emptyImage1 = np.zeros((height, width, 3), np.uint8)
for y in range(0, height):
for x in range(0, horizontal[y]):
b = (255, 255, 255)
emptyImage1[y, x] = b
cv2.imshow('chuizhi', emptyImage)
cv2.imshow('shuipin', emptyImage1)
cv2.waitKey(0)
if __name__ == '__main__':
img = cv2.imread('./123.png', 0)
print("img",img.shape)
closed_2, closed_5 = read_img(img)
project_img(closed_2)
project_img(closed_5)
cv2.destroyAllWindows()
原图:
代码运行效果:投影如下(通过调节形态学参数,获取不同的投影效果)
import cv2
import numpy
img = cv2.imread('D:/0.jpg',cv2.COLOR_BGR2GRAY)
height, width = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(_, thresh) = cv2.threshold(gray, 140, 255, cv2.THRESH_BINARY)
closed = cv2.erode(thresh, None, iterations = 7)
height, width = closed.shape[:2]
z = [0]*height
v = [0]*width
hfg = [[0 for col in range(2)] for row in range(height)]
lfg = [[0 for col in range(2)] for row in range(width)]
box = [0,0,0,0]
#水平投影
a = 0
emptyImage1 = numpy.zeros((height, width, 3), numpy.uint8)
for y in range(0, height):
for x in range(0, width):
cp = closed[y,x]
#if np.any(closed[y,x]):
if cp == 0:
a = a + 1
else :
continue
z[y] = a
#print z[y]
a = 0
#根据水平投影值选定行分割点
inline = 1
start = 0
j = 0
for i in range(0,height):
if inline == 1 and z[i] >= 150 : #从空白区进入文字区
start = i #记录起始行分割点
#print i
inline = 0
elif (i - start > 3) and z[i] < 150 and inline == 0 : #从文字区进入空白区
inline = 1
hfg[j][0] = start - 2 #保存行分割位置
hfg[j][1] = i + 2
j = j + 1
#对每一行垂直投影、分割
a = 0
for p in range(0, j):
for x in range(0, width):
for y in range(hfg[p][0], hfg[p][1]):
cp1 = closed[y,x]
if cp1 == 0:
a = a + 1
else :
continue
v[x] = a #保存每一列像素值
a = 0
#print width
#垂直分割点
incol = 1
start1 = 0
j1 = 0
z1 = hfg[p][0]
z2 = hfg[p][1]
for i1 in range(0,width):
if incol == 1 and v[i1] >= 20 : #从空白区进入文字区
start1 = i1 #记录起始列分割点
incol = 0
elif (i1 - start1 > 3) and v[i1] < 20 and incol == 0 : #从文字区进入空白区
incol = 1
lfg[j1][0] = start1 - 2 #保存列分割位置
lfg[j1][1] = i1 + 2
l1 = start1 - 2
l2 = i1 + 2
j1 = j1 + 1
cv2.rectangle(img, (l1, z1), (l2, z2), (255,0,0), 2)
cv2.imshow('result', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
分割效果不佳(有待优化)
import cv2
import numpy as np
img = cv2.imread('./0.png', cv2.COLOR_BGR2GRAY)
height, width = img.shape[:2]
data = np.array(img)
min_val = 10 #设置最小的文字像素高度,防止切分噪音字符
start_i = -1
end_i = -1
rowPairs = [] #存放每行的起止坐标
#行分割
for i in range(height):
if(not data[i].all() and start_i < 0):
start_i = i
elif(not data[i].all()):
end_i = i
elif (data[i].all() and start_i >= 0):
if(end_i - start_i >= min_val):
rowPairs.append((start_i, end_i))
start_i, end_i = -1, -1
#列分割
start_j = -1
end_j = -1
min_val_word = 5 #最小文字像素长度
number = 0 #分割后保存编号
for start, end in rowPairs:
for j in range(width):
if(not data[start: end, j].all() and start_j < 0):
start_j = j
elif(not data[start: end, j].all()):
end_j = j
elif(data[start: end, j].all() and start_j >= 0):
if(end_j - start_j >= min_val_word):
tmp = data[start:end, start_j: end_j]
cv2.imwrite("./" + '%d.png' % number, tmp)
number += 1
start_j, end_j = -1, -1
2.2 示例二:分割验证码代码
例如:图片内容:【axi3】
import cv2
import os
import numpy as np
def get_img(Input_Path):
img_list = []
for roots, dir, filenames in os.walk(Input_Path):
for filename in filenames:
if filename.endswith((".jpg", ".png")):
img_list.append(roots + "/" + filename)
return img_list
def preject_and_cut(img_path,save_path):
img_name = img_path.split("/")[-1][:4]
img = cv2.imread(img_path, 0)
height, width = img.shape[:2]
height_fenge = np.zeros((height,2),dtype=np.int32)
width_fenge = np.zeros((width,2),dtype=np.int32)
# 水平投影 #统计每一行的黑点数
horizontal = np.zeros(height,dtype=np.int32)
vertical = np.zeros(width, dtype=np.int32)
for y in range(0, height):
for x in range(0, width):
if img[y, x] == 0:
horizontal[y] += 1
# 根据水平投影值选定行分割点
inline = 1
start = 0
j = 0
for i in range(0, height):
if inline == 1 and horizontal[i] >= 6: # 从空白区进入文字区
start = i # 记录起始行分割点
inline = 0
elif (i - start > 3) and horizontal[i] < 6 and inline == 0: # 从文字区进入空白区
inline = 1
height_fenge[j][0] = start - 1 # 保存行分割位置
height_fenge[j][1] = i + 2
j = j + 1
# 对每一行垂直投影、分割
for x in range(width):
for y in range(height_fenge[0][0], height_fenge[0][1]):
if img[y, x] == 0:
vertical[x] += 1
# 垂直分割点
incol = 1
start1 = 0
j1 = 0
for i1 in range(width):
if incol == 1 and vertical[i1] >= 3: # 从空白区进入文字区
start1 = i1 # 记录起始列分割点
incol = 0
elif (i1 - start1 > 3) and vertical[i1] < 3 and incol == 0: # 从文字区进入空白区
incol = 1
width_fenge[j1][0] = start1 - 1 # 保存列分割位置
width_fenge[j1][1] = i1 + 2
j1 = j1 + 1
cut_area = []
for ii in range(len(width_fenge)):
if width_fenge[ii][1] - width_fenge[ii][0] >10: # 设定字宽像素
cut_area.append(width_fenge[ii])
if 2 < len(cut_area):
x0 = width_fenge[0][0]
x1 = width_fenge[0][1]
y0 = height_fenge[0][0]
y1 = height_fenge[0][1]
x0_1 = width_fenge[1][0]
x1_1 = width_fenge[1][1]
y0_1 = height_fenge[0][0]
y1_1 = height_fenge[0][1]
x0_2 = width_fenge[2][0]
x1_2 = width_fenge[2][1]
y0_2 = height_fenge[0][0]
y1_2 = height_fenge[0][1]
cv2.imwrite(save_path + "/" + img_name+"_"+img_name[0] + ".png", img[y0:y1, x0:x1])
cv2.imwrite(save_path + "/" + img_name+"_"+img_name[1] + ".png", img[y0_1:y1_1, x0_1:x1_1])
cv2.imwrite(save_path + "/" + img_name+"_"+img_name[2] + ".png", img[y0_2:y1_2, x0_2:x1_2])
if 4 == len(cut_area):
x0_3 = width_fenge[3][0]
x1_3 = width_fenge[3][1]
y0_3 = height_fenge[0][0]
y1_3 = height_fenge[0][1]
cv2.imwrite(save_path + "/" + img_name+"_"+img_name[3] + ".png", img[y0_3:y1_3, x0_3:x1_3])
return img
if __name__ == '__main__':
input_path = './trainData_denoise'
save_path= './trainData_denoise_cut'
img_list = get_img(input_path)
print("img_list",len(img_list))
for img_path in img_list:
print("===========img_path================",img_path)
img = preject_and_cut(img_path,save_path)
cv2.imshow('result', img)
cv2.waitKey(22)
cv2.destroyAllWindows()
2.3 示例三:水平投影获取页面表格区域
#%%cython --cplus --a --compile-args=/openmp
import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import parallel, prange
from libcpp.vector cimport vector
@cython.boundscheck(False)
@cython.wraparound(False)
def preject_and_cut(np.ndarray[np.uint8_t, ndim=2] thresh):
cdef Py_ssize_t height = thresh.shape[0]
cdef Py_ssize_t width = thresh.shape[1]
cdef Py_ssize_t y=0, x=0,i=10,thread = <int>(0.7 * width)
horizontal = np.zeros(height, dtype=np.uint32)
cdef unsigned char[:,::1] raw_view = thresh
cdef unsigned long[::1] hor_view = horizontal
cdef vector[int] local
local.reserve(thread)
with nogil,parallel():
for y in prange(height):
for x in range(width):
if raw_view[y, x] == 0:
hor_view[y] += 1
for i in range(10, height - 10):
if hor_view[i] > thread:
local.push_back(i)
return np.asarray(local)
编译文件:setup.py
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
import numpy as np
ext_modules = [Extension('preject_and_cut',
sources=['preject_and_cut.pyx'],
extra_compile_args=['/openmp'],
language='c++')]
setup(
name = 'preject_and_cut',
cmdclass = {'build_ext': build_ext},
ext_modules = ext_modules,
include_dirs=[np.get_include()]
)
测试代码:
import cv2
import os
from preject_and_cut import preject_and_cut
def get_img(Input_Path):
img_list = []
for roots, dir, filenames in os.walk(Input_Path):
for filename in filenames:
if filename.endswith((".tiff", ".png")):
img_list.append(roots + "/" + filename)
return img_list
if __name__ == '__main__':
import time
Input_Path = './table'
img_list = get_img(Input_Path)
for input_Path in img_list:
t0 = time.time()
img = cv2.imread(input_Path, 0)
_, thresh = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY)
local = preject_and_cut(thresh)
t1 = time.time()
print("time is ",t1-t0)
三、基于连通域标记的灰度图的前景背景分割
connectedComponentsWithStats(image[, labels[, stats[, centroids[, connectivity[, ltype]]]]]) -> retval, labels, stats, centroids
image
:输入图像,必须是二值图
labels
:指向被标记的图像
stats
:每一个标记的统计信息输出,包括背景。可以通过stas(label, column)查看每一个标记的信息。
centroids
:每一个标记的中心位置。
connectivity
:可选值为4或8,也就是使用4连通还是8连通。
ltype
:输出图像标记的类型,目前支持CV_32S 和 CV_16U。
返回值:
retval
:所有标记类型的数目
labels
:图像上每一像素的标记,用数字1、2、3…表示
stats
:每一个标记的统计信息,是一个5列的矩阵,每一行对应各个轮廓的x、y、width、height和面积,
- 示例:
0 0 720 720 291805
92 0 628 720 226595
centroids
:连通域的中心点
import cv2
import numpy as np
img = cv2.imread("./test.png")
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret, binary = cv2.threshold(gray, 30, 255, cv2.THRESH_BINARY)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))
bin_clo = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations = 2)
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(bin_clo,connectivity = 8)
"""
#查看各个返回值
print('num_labels = ',num_labels)
print('stats = ',stats)
print('centroids = ',centroids)
print('labels = ',labels)
"""
label_area = stats[:,-1]
max_index = np.argmax(label_area)
#label the backgroud and foreground
height = labels.shape[0]
width = labels.shape[1]
for row in range(height):
for col in range(width):
if labels[row,col] == max_index:
gray[row,col] = 0
else:
gray[row,col] = 255
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))
conne = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel, iterations = 2)
cv2.namedWindow('results',cv2.WINDOW_AUTOSIZE)
cv2.imshow('results',conne)
cv2.waitKey(0)
cv2.destroyAllWindows()