背景
找到个磁共振数据集做训练,时需要从两个文件夹中找出相似的图像对
思路是从a文件里选定一张图片,遍历b文件夹,找出最相似的,超过阈值则保存
代码
import os
import cv2
import numpy as np
from skimage.metrics import structural_similarity as ssim
from concurrent.futures import ThreadPoolExecutor, as_completed
def load_images_from_folder(folder):
"""
从指定文件夹加载灰度图像。
参数:
folder (str): 包含图像的文件夹路径。
返回:
dict: 以文件名为键,加载的图像为值的字典。
"""
images = {}
for filename in os.listdir(folder):
img_path = os.path.join(folder, filename)
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
if img is not None:
images[filename] = img
return images
def crop_center(img, crop_fraction=0.6):
"""
裁剪图像的中心区域。
参数:
img (ndarray): 输入图像。
crop_fraction (float): 要保留的图像部分的比例。
返回:
ndarray: 裁剪后的图像。
"""
h, w = img.shape
crop_h, crop_w = int(h * crop_fraction), int(w * crop_fraction)
start_h, start_w = (h - crop_h) // 2, (w - crop_w) // 2
return img[start_h:start_h + crop_h, start_w:start_w + crop_w]
def find_best_match(image, image_dict):
"""
在图像字典中找到与输入图像最相似的图像。
参数:
image (ndarray): 输入图像。
image_dict (dict): 包含图像的字典。
返回:
tuple: 最相似图像的文件名和相似度值。
"""
max_ssim = -1
best_match = None
cropped_image = crop_center(image)
for filename, img in image_dict.items():
cropped_img = crop_center(img)
# 将裁剪后的输入图像调整到与裁剪后的比较图像相同的大小
resized_image = cv2.resize(cropped_image, (cropped_img.shape[1], cropped_img.shape[0]))
current_ssim = ssim(resized_image, cropped_img)
if current_ssim > max_ssim:
max_ssim = current_ssim
best_match = filename
return best_match, max_ssim
def skip_black_background_image(image, threshold=0.06):
"""
跳过黑色背景的图像。
参数:
image (ndarray): 输入图像。
threshold (float): 非黑色像素的阈值比例。
返回:
bool: 如果图像主要是黑色背景,则返回True,否则返回False。
"""
# 计算非黑色像素的百分比
num_non_black_pixels = np.sum(image > 0)
total_pixels = image.size
if num_non_black_pixels / total_pixels < threshold:
return True
return False
def process_patient_folder(patient_folder, train_folder, output_folder, similarity_threshold, file_index):
"""
处理病人的文件夹,找到与每个T1w图像最匹配的T1wCE图像,并保存匹配对。
参数:
patient_folder (str): 病人文件夹的名称。
train_folder (str): 训练数据的根文件夹。
output_folder (str): 输出文件夹路径。
similarity_threshold (float): 图像相似度的阈值。
file_index (int): 输出文件的起始索引。
返回:
int: 更新后的文件索引。
"""
patient_path = os.path.join(train_folder, patient_folder)
t1w_folder = os.path.join(patient_path, 'T1w')
t1wce_folder = os.path.join(patient_path, 'T1wCE')
if not os.path.exists(t1w_folder) or not os.path.exists(t1wce_folder):
return file_index
t1w_images = load_images_from_folder(t1w_folder)
t1wce_images = load_images_from_folder(t1wce_folder)
total_t1w_images = len(t1w_images)
unmatched_count = 0
# 可选地,保存或处理匹配的图像对
patient_output_folder = os.path.join(output_folder, patient_folder)
for t1w_filename, t1w_image in t1w_images.items():
if skip_black_background_image(t1w_image):
unmatched_count += 1
continue
best_match, max_ssim = find_best_match(t1w_image, t1wce_images)
if max_ssim >= similarity_threshold:
if not os.path.exists(patient_output_folder):
os.makedirs(patient_output_folder)
print(f'病人: {patient_folder}, T1w: {t1w_filename}, T1wCE: {best_match}, SSIM: {max_ssim}, 输出文件: {file_index}_T1w.png')
t1w_output_path = os.path.join(patient_output_folder, f"{file_index}_T1w.png")
t1wce_output_path = os.path.join(patient_output_folder, f"{file_index}_T1wCE.png")
cv2.imwrite(t1w_output_path, t1w_image)
cv2.imwrite(t1wce_output_path, t1wce_images[best_match])
file_index += 1
return file_index
# 定义路径
train_folder = './train'
output_folder = 'output_folder'
similarity_threshold = 0.75
if not os.path.exists(output_folder):
os.makedirs(output_folder)
file_index = 1
patient_folders = [f for f in os.listdir(train_folder) if os.path.isdir(os.path.join(train_folder, f))]
with ThreadPoolExecutor(max_workers=6) as executor:
future_to_patient = {
executor.submit(process_patient_folder, patient_folder, train_folder, output_folder, similarity_threshold, file_index): patient_folder for patient_folder in patient_folders}
for future in as_completed(future_to_patient):
patient_folder = future_to_patient[future]
try:
file_index = future.result()
except Exception as exc:
print(f'{patient_folder} 生成异常: {exc}')
else:
print(f'{patient_folder} 处理完成。')