Hello, I’m trying to write a function that applies random augmentations to audio files, which has been converted to pytorch tensors in a prior operation. The problem is, the code I wrote runs really slow, I have located the culprit to be the “s” within x,_ = AF.speed(x, self.sr, s). If I change the “s” to a constant like 1.3 then the code runs swiftly with no problem. Has anyone encountered the same problem before? Does anyone know what the problem is? Or does anyone knows a better way to apply random audio augmentations? Any suggestions will help, thank you!
import torch as T
import torchvision.transforms as VT
import torchaudio.transforms as AT
class myAudioTransforms:
def init(self, sample_rate:int, len:float, speed:tuple, noise:float):
self.sr = sample_rate
self.speedgap = speed[1]-speed[0]
self.speedmin = speed[0]
self.spectrogram = AT.MelSpectrogram(sample_rate, 400, n_mels=96)
self.resize = VT.Resize((96,96))
def get_params(self):
s = random.random()*self.speedgap + self.speedmin
s = round(s*10)/10
return s
def __call__(self, x):
x = x.to(T.float32).unsqueeze(0)
s = self.get_params()
x,_ = AF.speed(x, self.sr, s) # adjust speed
spec = self.spectrogram(x)
spec = self.resize(spec)
return spec