0
点赞
收藏
分享

微信扫一扫

批量将中文单个字音频转为图像(二维矩阵)


import numpy as np
import librosa
import cv2
import matplotlib.pyplot as plt
import os
import pypinyin
import torch
file_name_list=os.listdir("2500中文常用字发音")
label_class_list=sorted(set([ pypinyin.pinyin(one_name[:1])[0][0] for one_name in file_name_list]))
input_lable_list=[]
for one_name in file_name_list:
print(one_name)

audio,sr=librosa.load("D:/LSTM_poem/2500中文常用字发音/{}".format(one_name),16000)
# x=np.linspace(6*np.pi+1,12*np.pi,200)
# x_=np.linspace(1,6*np.pi+1,200)
y=audio[1000:4000]
total_res=np.hstack([y[::-1],y])
mask=np.zeros([total_res.size,total_res.size])

row,col=mask.shape
m=row//2
n=col//2
mask[m,:]=total_res
mask[m-1,:]=total_res
mask[:,n]=total_res
mask[:,n-1]=total_res
x=0
y=-1
res=mask[m,0]
mask[:,0]=[res]*(2*m-0*2)
mask[:,-1]=[res]*(2*m-0*2)
mask[0,:]=[res]*(2*m-0*2)
mask[-1,:]=[res]*(2*m-0*2)
for i in range(1,m-1):
res = mask[m, i]
mask[i:-i, i] = [res] * (2 * m - i * 2)
mask[i:-i, -i-1] = [res] * (2 * m - i * 2)
mask[i, i:-i] = [res] * (2 * m - i * 2)
mask[-i-1, i:-i] = [res] * (2 * m - i * 2)

input_lable_list.append([cv2.resize(mask,[128,128]),label_class_list.index(pypinyin.pinyin(one_name[:1])[0][0])])
torch.save({"data":input_lable_list,"voc":label_class_list},"train_audio_to_png.pth")
# plt.imshow(cv2.resize(mask,[128,128]))
# plt.show()
if __name__ == '__main__':
pass



举报

相关推荐

0 条评论