0
点赞
收藏
分享

微信扫一扫

Opencv—python 基于投影的字符分割

宁静的猫 2022-08-09 阅读 84


文章目录

  • ​​一、前言​​
  • ​​二、代码​​
  • ​​2.1 简单的示例​​
  • ​​2.2 示例二:分割验证码代码​​
  • ​​2.3 示例三:水平投影获取页面表格区域​​
  • ​​三、基于连通域标记的灰度图的前景背景分割​​

一、前言

  1. 字符分割有很多方法,根据自己的需要来分析,那种方法更加适合自己。大致适而言有两种方法:投影分割法连通域分割法
  2. 投影法的原理:利用二值化图片的像素的分布直方图进行分析,从而找出相邻字符的分界点进行分割。
  3. 投影有: 水平投影 和 垂直投影

二、代码

2.1 简单的示例

import cv2
import numpy as np


def read_img(img):
(_, thresh) = cv2.threshold(img, 150, 255, cv2.THRESH_BINARY)
kernel_2 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) # 形态学处理:定义矩形结构
closed_2 = cv2.erode(thresh, kernel_2, iterations=2) # 闭运算:迭代2次

kernel_5 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) # 形态学处理:定义矩形结构
closed_5 = cv2.erode(thresh, kernel_5, iterations=5) # 闭运算:迭代5次


cv2.imshow('original_img', img)
cv2.imshow("closed_2",closed_2)
cv2.imshow("closed_5",closed_5)
cv2.waitKey(0)
return closed_2,closed_5


def project_img(image):
height, width = image.shape[:2]
print("image.shape",image.shape)

# 垂直投影:统计并存储每一列的黑点数
vertical = np.zeros(width,dtype=np.int32)
for x in range(0, width):
for y in range(0, height):
if image[y, x] == 0:
vertical[x]+=1

# 水平投影 #统计每一行的黑点数
horizontal = np.zeros(height,dtype=np.int32)
for y in range(0, height):
for x in range(0, width):
if image[y, x] == 0:
horizontal[y] += 1


# 创建空白图片,绘制垂直投影图
emptyImage = np.zeros((height, width, 3), np.uint8)
for x in range(0, width):
for y in range(0, vertical[x]):
b = (255, 255, 255)
emptyImage[y, x] = b

# 绘制水平投影图
emptyImage1 = np.zeros((height, width, 3), np.uint8)
for y in range(0, height):
for x in range(0, horizontal[y]):
b = (255, 255, 255)
emptyImage1[y, x] = b

cv2.imshow('chuizhi', emptyImage)
cv2.imshow('shuipin', emptyImage1)
cv2.waitKey(0)



if __name__ == '__main__':
img = cv2.imread('./123.png', 0)
print("img",img.shape)
closed_2, closed_5 = read_img(img)
project_img(closed_2)
project_img(closed_5)
cv2.destroyAllWindows()

原图:

Opencv—python 基于投影的字符分割_sed

代码运行效果:投影如下(通过调节形态学参数,获取不同的投影效果)

Opencv—python 基于投影的字符分割_连通域_02

import cv2
import numpy
img = cv2.imread('D:/0.jpg',cv2.COLOR_BGR2GRAY)
height, width = img.shape[:2]

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(_, thresh) = cv2.threshold(gray, 140, 255, cv2.THRESH_BINARY)
closed = cv2.erode(thresh, None, iterations = 7)
height, width = closed.shape[:2]

z = [0]*height
v = [0]*width
hfg = [[0 for col in range(2)] for row in range(height)]
lfg = [[0 for col in range(2)] for row in range(width)]
box = [0,0,0,0]
#水平投影
a = 0
emptyImage1 = numpy.zeros((height, width, 3), numpy.uint8)
for y in range(0, height):
for x in range(0, width):
cp = closed[y,x]
#if np.any(closed[y,x]):
if cp == 0:
a = a + 1
else :
continue
z[y] = a
#print z[y]
a = 0
#根据水平投影值选定行分割点
inline = 1
start = 0
j = 0
for i in range(0,height):
if inline == 1 and z[i] >= 150 : #从空白区进入文字区
start = i #记录起始行分割点
#print i
inline = 0
elif (i - start > 3) and z[i] < 150 and inline == 0 : #从文字区进入空白区
inline = 1
hfg[j][0] = start - 2 #保存行分割位置
hfg[j][1] = i + 2
j = j + 1

#对每一行垂直投影、分割
a = 0
for p in range(0, j):
for x in range(0, width):
for y in range(hfg[p][0], hfg[p][1]):
cp1 = closed[y,x]
if cp1 == 0:
a = a + 1
else :
continue
v[x] = a #保存每一列像素值
a = 0
#print width
#垂直分割点
incol = 1
start1 = 0
j1 = 0
z1 = hfg[p][0]
z2 = hfg[p][1]
for i1 in range(0,width):
if incol == 1 and v[i1] >= 20 : #从空白区进入文字区
start1 = i1 #记录起始列分割点
incol = 0
elif (i1 - start1 > 3) and v[i1] < 20 and incol == 0 : #从文字区进入空白区
incol = 1
lfg[j1][0] = start1 - 2 #保存列分割位置
lfg[j1][1] = i1 + 2
l1 = start1 - 2
l2 = i1 + 2
j1 = j1 + 1
cv2.rectangle(img, (l1, z1), (l2, z2), (255,0,0), 2)
cv2.imshow('result', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

分割效果不佳(有待优化)

import cv2
import numpy as np
img = cv2.imread('./0.png', cv2.COLOR_BGR2GRAY)

height, width = img.shape[:2]
data = np.array(img)
min_val = 10 #设置最小的文字像素高度,防止切分噪音字符

start_i = -1
end_i = -1
rowPairs = [] #存放每行的起止坐标

#行分割
for i in range(height):
if(not data[i].all() and start_i < 0):
start_i = i
elif(not data[i].all()):
end_i = i
elif (data[i].all() and start_i >= 0):
if(end_i - start_i >= min_val):
rowPairs.append((start_i, end_i))
start_i, end_i = -1, -1

#列分割
start_j = -1
end_j = -1
min_val_word = 5 #最小文字像素长度
number = 0 #分割后保存编号

for start, end in rowPairs:
for j in range(width):
if(not data[start: end, j].all() and start_j < 0):
start_j = j
elif(not data[start: end, j].all()):
end_j = j
elif(data[start: end, j].all() and start_j >= 0):
if(end_j - start_j >= min_val_word):
tmp = data[start:end, start_j: end_j]
cv2.imwrite("./" + '%d.png' % number, tmp)
number += 1
start_j, end_j = -1, -1

2.2 示例二:分割验证码代码

例如:图片内容:【axi3】

import cv2
import os
import numpy as np


def get_img(Input_Path):
img_list = []
for roots, dir, filenames in os.walk(Input_Path):
for filename in filenames:
if filename.endswith((".jpg", ".png")):
img_list.append(roots + "/" + filename)
return img_list



def preject_and_cut(img_path,save_path):
img_name = img_path.split("/")[-1][:4]
img = cv2.imread(img_path, 0)
height, width = img.shape[:2]
height_fenge = np.zeros((height,2),dtype=np.int32)
width_fenge = np.zeros((width,2),dtype=np.int32)

# 水平投影 #统计每一行的黑点数
horizontal = np.zeros(height,dtype=np.int32)
vertical = np.zeros(width, dtype=np.int32)
for y in range(0, height):
for x in range(0, width):
if img[y, x] == 0:
horizontal[y] += 1

# 根据水平投影值选定行分割点
inline = 1
start = 0
j = 0
for i in range(0, height):
if inline == 1 and horizontal[i] >= 6: # 从空白区进入文字区
start = i # 记录起始行分割点
inline = 0
elif (i - start > 3) and horizontal[i] < 6 and inline == 0: # 从文字区进入空白区
inline = 1
height_fenge[j][0] = start - 1 # 保存行分割位置
height_fenge[j][1] = i + 2
j = j + 1

# 对每一行垂直投影、分割
for x in range(width):
for y in range(height_fenge[0][0], height_fenge[0][1]):
if img[y, x] == 0:
vertical[x] += 1
# 垂直分割点
incol = 1
start1 = 0
j1 = 0
for i1 in range(width):
if incol == 1 and vertical[i1] >= 3: # 从空白区进入文字区
start1 = i1 # 记录起始列分割点
incol = 0
elif (i1 - start1 > 3) and vertical[i1] < 3 and incol == 0: # 从文字区进入空白区
incol = 1
width_fenge[j1][0] = start1 - 1 # 保存列分割位置
width_fenge[j1][1] = i1 + 2
j1 = j1 + 1


cut_area = []
for ii in range(len(width_fenge)):
if width_fenge[ii][1] - width_fenge[ii][0] >10: # 设定字宽像素
cut_area.append(width_fenge[ii])
if 2 < len(cut_area):
x0 = width_fenge[0][0]
x1 = width_fenge[0][1]
y0 = height_fenge[0][0]
y1 = height_fenge[0][1]

x0_1 = width_fenge[1][0]
x1_1 = width_fenge[1][1]
y0_1 = height_fenge[0][0]
y1_1 = height_fenge[0][1]

x0_2 = width_fenge[2][0]
x1_2 = width_fenge[2][1]
y0_2 = height_fenge[0][0]
y1_2 = height_fenge[0][1]

cv2.imwrite(save_path + "/" + img_name+"_"+img_name[0] + ".png", img[y0:y1, x0:x1])
cv2.imwrite(save_path + "/" + img_name+"_"+img_name[1] + ".png", img[y0_1:y1_1, x0_1:x1_1])
cv2.imwrite(save_path + "/" + img_name+"_"+img_name[2] + ".png", img[y0_2:y1_2, x0_2:x1_2])
if 4 == len(cut_area):
x0_3 = width_fenge[3][0]
x1_3 = width_fenge[3][1]
y0_3 = height_fenge[0][0]
y1_3 = height_fenge[0][1]
cv2.imwrite(save_path + "/" + img_name+"_"+img_name[3] + ".png", img[y0_3:y1_3, x0_3:x1_3])

return img

if __name__ == '__main__':
input_path = './trainData_denoise'
save_path= './trainData_denoise_cut'
img_list = get_img(input_path)
print("img_list",len(img_list))
for img_path in img_list:
print("===========img_path================",img_path)
img = preject_and_cut(img_path,save_path)
cv2.imshow('result', img)
cv2.waitKey(22)
cv2.destroyAllWindows()

2.3 示例三:水平投影获取页面表格区域

#%%cython --cplus --a --compile-args=/openmp
import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import parallel, prange
from libcpp.vector cimport vector



@cython.boundscheck(False)
@cython.wraparound(False)
def preject_and_cut(np.ndarray[np.uint8_t, ndim=2] thresh):
cdef Py_ssize_t height = thresh.shape[0]
cdef Py_ssize_t width = thresh.shape[1]
cdef Py_ssize_t y=0, x=0,i=10,thread = <int>(0.7 * width)

horizontal = np.zeros(height, dtype=np.uint32)
cdef unsigned char[:,::1] raw_view = thresh
cdef unsigned long[::1] hor_view = horizontal
cdef vector[int] local
local.reserve(thread)
with nogil,parallel():
for y in prange(height):
for x in range(width):
if raw_view[y, x] == 0:
hor_view[y] += 1
for i in range(10, height - 10):
if hor_view[i] > thread:
local.push_back(i)
return np.asarray(local)

编译文件:setup.py

from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
import numpy as np



ext_modules = [Extension('preject_and_cut',
sources=['preject_and_cut.pyx'],
extra_compile_args=['/openmp'],
language='c++')]

setup(
name = 'preject_and_cut',
cmdclass = {'build_ext': build_ext},
ext_modules = ext_modules,
include_dirs=[np.get_include()]
)

测试代码:

import cv2
import os
from preject_and_cut import preject_and_cut

def get_img(Input_Path):
img_list = []
for roots, dir, filenames in os.walk(Input_Path):
for filename in filenames:
if filename.endswith((".tiff", ".png")):
img_list.append(roots + "/" + filename)
return img_list



if __name__ == '__main__':
import time
Input_Path = './table'
img_list = get_img(Input_Path)
for input_Path in img_list:
t0 = time.time()
img = cv2.imread(input_Path, 0)
_, thresh = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY)
local = preject_and_cut(thresh)
t1 = time.time()
print("time is ",t1-t0)

三、基于连通域标记的灰度图的前景背景分割

connectedComponentsWithStats(image[, labels[, stats[, centroids[, connectivity[, ltype]]]]]) -> retval, labels, stats, centroids


​image​​:输入图像,必须是二值图
​​​labels​​​:指向被标记的图像
​​​stats​​​:每一个标记的统计信息输出,包括背景。可以通过stas(label, column)查看每一个标记的信息。
​​​centroids​​​:每一个标记的中心位置。
​​​connectivity​​​:可选值为4或8,也就是使用4连通还是8连通。
​​​ltype​​:输出图像标记的类型,目前支持CV_32S 和 CV_16U。


返回值:

​retval​​​:所有标记类型的数目
​​​labels​​​:图像上每一像素的标记,用数字1、2、3…表示
​​​stats​​:每一个标记的统计信息,是一个5列的矩阵,每一行对应各个轮廓的x、y、width、height和面积,

  • 示例:
    0 0 720 720 291805
    92 0 628 720 226595

​centroids​​:连通域的中心点

import cv2
import numpy as np


img = cv2.imread("./test.png")
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)


ret, binary = cv2.threshold(gray, 30, 255, cv2.THRESH_BINARY)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))
bin_clo = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations = 2)


num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(bin_clo,connectivity = 8)

"""
#查看各个返回值
print('num_labels = ',num_labels)
print('stats = ',stats)
print('centroids = ',centroids)
print('labels = ',labels)
"""

label_area = stats[:,-1]
max_index = np.argmax(label_area)

#label the backgroud and foreground
height = labels.shape[0]
width = labels.shape[1]
for row in range(height):
for col in range(width):
if labels[row,col] == max_index:
gray[row,col] = 0
else:
gray[row,col] = 255

kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))
conne = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel, iterations = 2)

cv2.namedWindow('results',cv2.WINDOW_AUTOSIZE)
cv2.imshow('results',conne)
cv2.waitKey(0)
cv2.destroyAllWindows()


举报

相关推荐

0 条评论