I have read this post and understand about root nodes: Grad is None even when requires_grad=True
I have followed its advice but I still get gradient of None
Here is the code I run:
import os
import torch
import torch.tensor as T
from fairseq.models.wav2vec import Wav2VecModel
MODEL = "wav2vec_small.pt"
fname = MODEL
os.system("wget -c https://dl.fbaipublicfiles.com/fairseq/wav2vec/%s" % MODEL)
cp = torch.load(fname)
model = Wav2VecModel.build_model(cp['args'], None)
model.load_state_dict(cp['model'], strict=False)
model.eval()
time = torch.linspace(0.0, 2.5, 64000)
def sine(f0):
return T(0.5) * torch.cos(f0 * 2.0 * 3.14 * time)
def extractor(x):
return model.feature_extractor(torch.tensor(x.view(1, -1)))
def wav2vecdist(x, y):
return torch.mean(torch.abs(extractor(x) - extractor(y)))
ft = T(440.0)
# If you enable GPU, you have to add 'device="cuda"'
with torch.enable_grad():
fp = T(660.0, requires_grad=True)
fp.retain_grad()
dist = torch.mean(torch.abs(extractor(sine(ft)) - extractor(sine(fp))))
print(dist)
dist.backward(retain_graph=False)
print(fp.grad)
As you can see, the extractor in wav2vec is not passing a gradient and I’m not sure why.
A minimal google colab is here: https://colab.research.google.com/drive/1l61syPS7GO7O-S0o8cWnv8R0JwzVq58T?usp=sharing