Opencv—python 基于投影的字符分割-CFANZ编程社区

文章目录

一、前言
二、代码

2.1 简单的示例
2.2 示例二：分割验证码代码
2.3 示例三：水平投影获取页面表格区域

三、基于连通域标记的灰度图的前景背景分割

一、前言

字符分割有很多方法，根据自己的需要来分析，那种方法更加适合自己。大致适而言有两种方法：投影分割法和连通域分割法。
投影法的原理：利用二值化图片的像素的分布直方图进行分析，从而找出相邻字符的分界点进行分割。
投影有: 水平投影和垂直投影

二、代码

2.1 简单的示例

import cv2
import numpy as np


def read_img(img):
    (_, thresh) = cv2.threshold(img, 150, 255, cv2.THRESH_BINARY)
    kernel_2 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))    # 形态学处理:定义矩形结构
    closed_2 = cv2.erode(thresh, kernel_2, iterations=2)            # 闭运算：迭代2次

    kernel_5 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))    # 形态学处理:定义矩形结构
    closed_5 = cv2.erode(thresh, kernel_5, iterations=5)            # 闭运算：迭代5次


    cv2.imshow('original_img', img)
    cv2.imshow("closed_2",closed_2)
    cv2.imshow("closed_5",closed_5)
    cv2.waitKey(0)
    return closed_2,closed_5


def project_img(image):
    height, width = image.shape[:2]
    print("image.shape",image.shape)

    # 垂直投影：统计并存储每一列的黑点数
    vertical = np.zeros(width,dtype=np.int32)
    for x in range(0, width):
        for y in range(0, height):
            if image[y, x] == 0:
                vertical[x]+=1

    # 水平投影  #统计每一行的黑点数
    horizontal = np.zeros(height,dtype=np.int32)
    for y in range(0, height):
        for x in range(0, width):
            if image[y, x] == 0:
                horizontal[y] += 1


    # 创建空白图片，绘制垂直投影图
    emptyImage = np.zeros((height, width, 3), np.uint8)
    for x in range(0, width):
        for y in range(0, vertical[x]):
            b = (255, 255, 255)
            emptyImage[y, x] = b

    # 绘制水平投影图
    emptyImage1 = np.zeros((height, width, 3), np.uint8)
    for y in range(0, height):
        for x in range(0, horizontal[y]):
            b = (255, 255, 255)
            emptyImage1[y, x] = b

    cv2.imshow('chuizhi', emptyImage)
    cv2.imshow('shuipin', emptyImage1)
    cv2.waitKey(0)



if __name__ == '__main__':
    img = cv2.imread('./123.png', 0)
    print("img",img.shape)
    closed_2, closed_5 = read_img(img)
    project_img(closed_2)
    project_img(closed_5)
    cv2.destroyAllWindows()

原图：

Opencv—python 基于投影的字符分割_sed

代码运行效果：投影如下（通过调节形态学参数，获取不同的投影效果）

Opencv—python 基于投影的字符分割_连通域_02

import cv2
import numpy
img = cv2.imread('D:/0.jpg',cv2.COLOR_BGR2GRAY)
height, width = img.shape[:2]

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(_, thresh) = cv2.threshold(gray, 140, 255, cv2.THRESH_BINARY) 
closed = cv2.erode(thresh, None, iterations = 7)
height, width = closed.shape[:2]

z = [0]*height
v = [0]*width
hfg = [[0 for col in range(2)] for row in range(height)]
lfg = [[0 for col in range(2)] for row in range(width)]
box = [0,0,0,0]
#水平投影
a = 0
emptyImage1 = numpy.zeros((height, width, 3), numpy.uint8) 
for y in range(0, height):
    for x in range(0, width):
        cp = closed[y,x]
        #if np.any(closed[y,x]):
        if cp == 0:
            a = a + 1
        else :
            continue
    z[y] = a
    #print z[y]
    a = 0
#根据水平投影值选定行分割点
inline = 1
start = 0
j = 0
for i in range(0,height):
    if inline == 1 and z[i] >= 150 :  #从空白区进入文字区
        start = i  #记录起始行分割点
        #print i
        inline = 0
    elif (i - start > 3) and z[i] < 150 and inline == 0 :  #从文字区进入空白区
        inline = 1
        hfg[j][0] = start - 2  #保存行分割位置
        hfg[j][1] = i + 2
        j = j + 1
 
#对每一行垂直投影、分割
a = 0
for p in range(0, j):
    for x in range(0, width):
        for y in range(hfg[p][0], hfg[p][1]):
            cp1 = closed[y,x]
            if cp1 == 0:
                a = a + 1
            else :
                continue
        v[x] = a  #保存每一列像素值
        a = 0
    #print width
    #垂直分割点
    incol = 1
    start1 = 0
    j1 = 0
    z1 = hfg[p][0]
    z2 = hfg[p][1]
    for i1 in range(0,width):
        if incol == 1 and v[i1] >= 20 :  #从空白区进入文字区
            start1 = i1  #记录起始列分割点
            incol = 0
        elif (i1 - start1 > 3) and v[i1] < 20 and incol == 0 :  #从文字区进入空白区
            incol = 1
            lfg[j1][0] = start1 - 2   #保存列分割位置
            lfg[j1][1] = i1 + 2
            l1 = start1 - 2
            l2 = i1 + 2
            j1 = j1 + 1
            cv2.rectangle(img, (l1, z1), (l2, z2), (255,0,0), 2)            
cv2.imshow('result', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

分割效果不佳（有待优化）

import cv2
import numpy as np
img = cv2.imread('./0.png', cv2.COLOR_BGR2GRAY)

height, width = img.shape[:2]
data = np.array(img)
min_val = 10     #设置最小的文字像素高度，防止切分噪音字符

start_i = -1
end_i = -1
rowPairs = []    #存放每行的起止坐标

#行分割
for i in range(height):
    if(not data[i].all() and start_i < 0): 
        start_i = i
    elif(not data[i].all()):
        end_i = i
    elif (data[i].all() and start_i >= 0):
        if(end_i - start_i >= min_val):
            rowPairs.append((start_i, end_i))
        start_i, end_i = -1, -1

#列分割
start_j = -1
end_j = -1
min_val_word = 5  #最小文字像素长度
number = 0        #分割后保存编号

for start, end in rowPairs:
    for j in range(width):
        if(not data[start: end, j].all() and start_j < 0):
            start_j = j
        elif(not data[start: end, j].all()):
             end_j = j
        elif(data[start: end, j].all() and start_j >= 0):
            if(end_j - start_j >= min_val_word):
                tmp = data[start:end, start_j: end_j]
                cv2.imwrite("./" + '%d.png' % number, tmp) 
                number += 1
            start_j, end_j = -1, -1

2.2 示例二：分割验证码代码

例如：图片内容：【axi3】

import cv2
import os
import numpy as np


def get_img(Input_Path):
    img_list = []
    for roots, dir, filenames in os.walk(Input_Path):
        for filename in filenames:
            if filename.endswith((".jpg", ".png")):
                img_list.append(roots + "/" + filename)
    return img_list



def preject_and_cut(img_path,save_path):
    img_name = img_path.split("/")[-1][:4]
    img = cv2.imread(img_path, 0)
    height, width = img.shape[:2]
    height_fenge = np.zeros((height,2),dtype=np.int32)
    width_fenge = np.zeros((width,2),dtype=np.int32)

    # 水平投影  #统计每一行的黑点数
    horizontal = np.zeros(height,dtype=np.int32)
    vertical = np.zeros(width, dtype=np.int32)
    for y in range(0, height):
        for x in range(0, width):
            if img[y, x] == 0:
                horizontal[y] += 1

    # 根据水平投影值选定行分割点
    inline = 1
    start = 0
    j = 0
    for i in range(0, height):
        if inline == 1 and horizontal[i] >= 6:                       # 从空白区进入文字区
            start = i                                                # 记录起始行分割点
            inline = 0
        elif (i - start > 3) and horizontal[i] < 6 and inline == 0:  # 从文字区进入空白区
            inline = 1
            height_fenge[j][0] = start - 1                           # 保存行分割位置
            height_fenge[j][1] = i + 2
            j = j + 1

    # 对每一行垂直投影、分割
    for x in range(width):
        for y in range(height_fenge[0][0], height_fenge[0][1]):
            if img[y, x] == 0:
                vertical[x] += 1
    # 垂直分割点
    incol = 1
    start1 = 0
    j1 = 0
    for i1 in range(width):
        if incol == 1 and vertical[i1] >= 3:                         # 从空白区进入文字区
            start1 = i1                                              # 记录起始列分割点
            incol = 0
        elif (i1 - start1 > 3) and vertical[i1] < 3 and incol == 0:  # 从文字区进入空白区
            incol = 1
            width_fenge[j1][0] = start1 - 1                          # 保存列分割位置
            width_fenge[j1][1] = i1 + 2
            j1 = j1 + 1


    cut_area = []
    for ii in range(len(width_fenge)):
        if width_fenge[ii][1] - width_fenge[ii][0] >10:                 # 设定字宽像素
            cut_area.append(width_fenge[ii])
    if 2 < len(cut_area):
        x0 = width_fenge[0][0]
        x1 = width_fenge[0][1]
        y0 = height_fenge[0][0]
        y1 = height_fenge[0][1]

        x0_1 = width_fenge[1][0]
        x1_1 = width_fenge[1][1]
        y0_1 = height_fenge[0][0]
        y1_1 = height_fenge[0][1]

        x0_2 = width_fenge[2][0]
        x1_2 = width_fenge[2][1]
        y0_2 = height_fenge[0][0]
        y1_2 = height_fenge[0][1]

        cv2.imwrite(save_path + "/" + img_name+"_"+img_name[0] + ".png", img[y0:y1, x0:x1])
        cv2.imwrite(save_path + "/" + img_name+"_"+img_name[1] + ".png", img[y0_1:y1_1, x0_1:x1_1])
        cv2.imwrite(save_path + "/" + img_name+"_"+img_name[2] + ".png", img[y0_2:y1_2, x0_2:x1_2])
        if 4 == len(cut_area):
            x0_3 = width_fenge[3][0]
            x1_3 = width_fenge[3][1]
            y0_3 = height_fenge[0][0]
            y1_3 = height_fenge[0][1]
            cv2.imwrite(save_path + "/" + img_name+"_"+img_name[3] + ".png", img[y0_3:y1_3, x0_3:x1_3])

    return img

if __name__ == '__main__':
    input_path = './trainData_denoise'
    save_path= './trainData_denoise_cut'
    img_list = get_img(input_path)
    print("img_list",len(img_list))
    for img_path in img_list:
        print("===========img_path================",img_path)
        img = preject_and_cut(img_path,save_path)
        cv2.imshow('result', img)
        cv2.waitKey(22)
    cv2.destroyAllWindows()

2.3 示例三：水平投影获取页面表格区域

#%%cython --cplus --a --compile-args=/openmp
import numpy as np
cimport numpy as np
cimport cython
from cython.parallel import parallel, prange
from libcpp.vector cimport vector



@cython.boundscheck(False)
@cython.wraparound(False)
def preject_and_cut(np.ndarray[np.uint8_t, ndim=2] thresh):
    cdef Py_ssize_t height = thresh.shape[0]
    cdef Py_ssize_t width  = thresh.shape[1]
    cdef Py_ssize_t y=0, x=0,i=10,thread = <int>(0.7 * width)

    horizontal = np.zeros(height, dtype=np.uint32)
    cdef unsigned char[:,::1] raw_view = thresh
    cdef unsigned long[::1] hor_view = horizontal
    cdef vector[int] local
    local.reserve(thread) 
    with nogil,parallel():
        for y in prange(height):
            for x in range(width):
                if raw_view[y, x] == 0:
                    hor_view[y] += 1
        for i in range(10, height - 10):
            if hor_view[i] > thread:
                local.push_back(i)
    return np.asarray(local)

编译文件：setup.py

from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
import numpy as np



ext_modules = [Extension('preject_and_cut', 
                         sources=['preject_and_cut.pyx'],
                         extra_compile_args=['/openmp'],
                         language='c++')]

setup(
    name = 'preject_and_cut',
    cmdclass = {'build_ext': build_ext},
    ext_modules = ext_modules,
    include_dirs=[np.get_include()]
)

测试代码：

import cv2
import os
from preject_and_cut import preject_and_cut

def get_img(Input_Path):
    img_list = []
    for roots, dir, filenames in os.walk(Input_Path):
        for filename in filenames:
            if filename.endswith((".tiff", ".png")):
                img_list.append(roots + "/" + filename)
    return img_list



if __name__ == '__main__':
    import time
    Input_Path = './table'
    img_list = get_img(Input_Path)
    for input_Path in img_list:
        t0 = time.time()
        img = cv2.imread(input_Path, 0)
        _, thresh = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY)
        local = preject_and_cut(thresh)
        t1 = time.time()
        print("time is ",t1-t0)

三、基于连通域标记的灰度图的前景背景分割

connectedComponentsWithStats(image[, labels[, stats[, centroids[, connectivity[, ltype]]]]]) -> retval, labels, stats, centroids

image：输入图像，必须是二值图
labels：指向被标记的图像
stats：每一个标记的统计信息输出，包括背景。可以通过stas(label, column)查看每一个标记的信息。
centroids：每一个标记的中心位置。
connectivity：可选值为4或8，也就是使用4连通还是8连通。
ltype：输出图像标记的类型，目前支持CV_32S 和 CV_16U。

返回值：

retval：所有标记类型的数目
labels：图像上每一像素的标记，用数字1、2、3…表示
stats：每一个标记的统计信息，是一个5列的矩阵，每一行对应各个轮廓的x、y、width、height和面积，

示例：
0 0 720 720 291805
92 0 628 720 226595

centroids：连通域的中心点

import cv2
import numpy as np
 

img = cv2.imread("./test.png")
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)


ret, binary = cv2.threshold(gray, 30, 255, cv2.THRESH_BINARY)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))
bin_clo = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations = 2)
 

num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(bin_clo,connectivity = 8)
 
"""
#查看各个返回值
print('num_labels = ',num_labels)
print('stats = ',stats)
print('centroids = ',centroids)
print('labels = ',labels)
"""
 
label_area = stats[:,-1]
max_index = np.argmax(label_area)
 
#label the backgroud and foreground
height = labels.shape[0]
width = labels.shape[1]
for row in range(height):
    for col in range(width):
        if labels[row,col] == max_index:
            gray[row,col] = 0
        else:
            gray[row,col] = 255
 
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))
conne = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel, iterations = 2)
 
cv2.namedWindow('results',cv2.WINDOW_AUTOSIZE)
cv2.imshow('results',conne)
cv2.waitKey(0)
cv2.destroyAllWindows()