环境win10+python3.9+opencv-python
将OCR文本图放到一个固定尺寸上:
resize到32x640的尺寸上:
python脚本:
import os
import random
import cv2
import glob
import pathlib
import numpy as np
# 批量resize图片
data_path = r'E:\datasets\gen_number_str_hw\train_enhance'
save_path = r'E:\datasets\gen_number_str_hw\train_enhance_resize'
if not os.path.exists(save_path):
os.makedirs(save_path)
best_width = 0
resize_width = 640
width_limit_cnt = 0
for img_path in glob.glob(data_path + '/*.jpg', recursive=True):
d = pathlib.Path(img_path)
img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
if img is None:
continue
dim = (int(img.shape[1] * 32 / img.shape[0]), 32)
# resize image
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
# print(resized.shape[0],resized.shape[1])
if resized.shape[1] > best_width:
best_width = resized.shape[1]
print(best_width, img_path)
if resized.shape[1] > 640:
width_limit_cnt = width_limit_cnt + 1
print('limit:' + str(width_limit_cnt), img_path)
img = np.zeros((32, resize_width, 3), np.uint8) # 创建一个空白图像
img.fill(0)
fill_w = resize_width - resized.shape[1]
fill_img = np.zeros((32, fill_w, 3), np.uint8)
fill_img.fill(random.randint(0, 255))
img.fill(0)
img[:, resized.shape[1]:resize_width] = fill_img
img[:, 0:resized.shape[1]] = resized
# c = cv2.addWeighted(resized, 0.4, img, 0.6, 0, 0)
# cv2.imshow('img', img)
# cv2.waitKey(0)
# save_img_path = os.path.join(save_path, (str(d.stem) + '.jpg'))
save_img_path = save_path + '/' + str(d.stem) + '.jpg'
# cv2.imwrite(save_img_path,resized)
cv2.imwrite(save_img_path, img)
# cv2.namedWindow("test")
# cv2.imshow('test', resized)
# cv2.waitKey(300) # 显示 10000 ms 即 10s 后消失
# cv2.destroyAllWindows()