设计思路图
import torch
import torchaudio
import matplotlib.pyplot as plt
import numpy
from moviepy.editor import VideoFileClip,AudioFileClip,AudioClip
waveform,sample_rate = torchaudio.load("sss.wav")
data=waveform[0][:-1]-waveform[0][1:]
data1=data.numpy().copy()
data1[data1>0]=10
data1[data1<0]=0
if len(data1) % 2 == 0:
data2 = torch.sum(torch.Tensor(data1)[:].view(-1, 2), dim=1).numpy()
# data2[data2 == 2] = 1
else:
data2 = torch.sum(torch.Tensor(data1)[:-1].view(-1, 2), dim=1).numpy()
# data2[data2 == 2] = 1
if sample_rate>16000:
for i in range(6):
if len(data2)%2==0:
data2 = torch.sum(torch.Tensor(data2)[:].view(-1, 2), dim=1).numpy()
# data2[data2 == 2] = 1
else:
data2=torch.sum(torch.Tensor(data2)[:-1].view(-1,2),dim=1).numpy()
# data2[data2 == 2] = 1
else:
for i in range(3):
if len(data2) % 2 == 0:
data2 = torch.sum(torch.Tensor(data2)[:].view(-1, 2), dim=1).numpy()
# data2[data2 == 2] = 1
else:
data2 = torch.sum(torch.Tensor(data2)[:-1].view(-1, 2), dim=1).numpy()
# data2[data2 == 2] =1
not_zero=numpy.nonzero((data2>0)[1:]^(data2>0)[:-1])
from pydub import AudioSegment
file_name = r"sss.wav"
sound = AudioSegment.from_wav(file_name)
start=not_zero[0][0]*len(data)//len(data2)//sample_rate*1000
import time
for one in not_zero[0][1:]:
if one * len(data) // len(data2) // sample_rate * 1000 - start<2000:
continue
else:
print(one * len(data) // len(data2) // sample_rate * 1000 - start)
word = sound[start:one*len(data)//len(data2)//sample_rate*1000]
word.export(str(one)+'.wav', format="wav", tags={'artist': 'AppLeU0', 'album':".wav"})
start=one*len(data)//len(data2)//sample_rate*1000
if __name__ == '__main__':
pass
精确分割和筛选音频从长的音频中
import torchaudio
import numpy
filename="untitledxxx.wav"
waveform,sample_rate = torchaudio.load(filename)
print("Shape of waveform:{}".format(waveform.size())) #音频大小
print("sample rate of waveform:{}".format(sample_rate))#采样率
data=waveform[0][1:]-waveform[0][:-1]
data1=data.numpy().copy()
data1[data1>0]=1
data1[data1<0]=0
y=numpy.nonzero(data1==0)[0][1:]
x=numpy.nonzero(data1==0)[0][:-1]
y_x=y-x
# 不连续且大于4ms=sample_rate*4/1000的
gt_4ms=sample_rate*4/1000
#
from pydub import AudioSegment
sound = AudioSegment.from_wav(filename)
y1=numpy.nonzero(y_x!=1)[0][1:]
x1=numpy.nonzero(y_x!=1)[0][:-1]
y_x1=numpy.nonzero((y1-x1)>gt_4ms)[0]
for i in range(len(y_x1)-1):
inde_x_s=0
inde_x_e=0
s=y[y1[y_x1[i]]]
e=x[x1[y_x1[i+1]]]
x_data=data1[s:e].tolist()
if x_data.count(1)>300*gt_4ms:
while sum(x_data[:int(gt_4ms)])<0.3*gt_4ms:
inde_x_s+=1
x_data=x_data[int(gt_4ms):]
while sum(x_data[-int(gt_4ms):])<0.3*gt_4ms:
inde_x_e+=1
x_data=x_data[:-int(gt_4ms)]
if numpy.mean(x_data)>0.35:
print(inde_x_s,inde_x_e)
print(data1[s+inde_x_s*int(gt_4ms):e-inde_x_e*int(gt_4ms)].tolist().count(0),data1[s+inde_x_s*int(gt_4ms):e-inde_x_e*int(gt_4ms)].tolist().count(1))
word = sound[(s+inde_x_s*int(gt_4ms))*1000/sample_rate:(e-inde_x_e*int(gt_4ms))*1000/sample_rate]
word.export(str(i)+'.wav', format="wav", tags={'artist': 'AppLeU0', 'album':".wav"})
if __name__ == '__main__':
pass