Hi, Im a torch newbie and attempting to assist a student with an issue they ran, when attempting to run a RNN for a special application.
The error message is:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [50, 5]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead.
Here is a self contained code which should recreate this error. I know the code sucks, it is just for playing around to get the loss and learning running (thats why everything is hard coded atm) + we are new to torch
I added .clone() and stuff everywhere in hopes to get rid of this error, but nothing worked so far. According to the output, the error is thrown on the like " result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,".
import numpy as np
import torch
from torch import autograd, nn
import torchaudio
torch.autograd.set_detect_anomaly(True)
signal_length = 500
input_size = 5
lamb = 0.5
dummy_signal = torch.from_numpy(np.random.rand(1, signal_length).astype(np.float32))
mfcc_comp = torchaudio.transforms.MFCC(sample_rate = 16000, n_mfcc = 20,melkwargs = {'n_mels':60})
def wrapperkwargs(func, kwargs):
return func(**kwargs)
def wrapperargs(func, args):
return func(*args)
class SimpleRNN(nn.Module):
def __init__(self, input_size = input_size, output_size = input_size, unit_type = "LSTM", hidden_size = 50,skip = 1, bias_fl = True, num_layers = 1):
super(SimpleRNN, self).__init__()
self.input_size = input_size
self.output_size = output_size
self.rec = wrapperargs(getattr(nn, unit_type), [input_size, hidden_size, num_layers])
self.lin = nn.Linear(hidden_size, output_size, bias=bias_fl)
self.bias_fl = bias_fl
self.skip = skip
self.save_state = True
self.hidden = None
torch.autograd.set_detect_anomaly(True)
def forward(self, x):
if self.skip:
# save the residual for the skip connection
# print('x',x), print('size x', x.size())
res = x[ :,0:self.skip].clone().detach()
x, self.hidden = self.rec(x.clone(), self.hidden)
return self.lin(x) + res
else:
x, self.hidden = self.rec(x.clone(), self.hidden.clone()).detach()
self.hidden = self.hidden.detach()
return self.lin(x)
# detach hidden state, this resets gradient tracking on the hidden state
def detach_hidden(self):
if self.hidden.__class__ == tuple:
self.hidden = tuple([h.clone().detach() for h in self.hidden])
else:
self.hidden = self.hidden.clone().detach()
# changes the hidden state to None, causing pytorch to create an all-zero hidden state when the rec unit is called
def reset_hidden(self):
self.hidden = None
def my_loss(output,targ, inp):
mfccs_output = mfcc_comp(output)
mfccs_target = mfcc_comp(targ)
mfccs_input = mfcc_comp(inp)
min1 = (mfccs_output[0][:3][:].clone()-mfccs_target[0][:3][:].clone())
min1 = min1**2
min1 = torch.mean(min1,1)
min2 = (mfccs_output[0][12:17][:].clone()-mfccs_input[0][12:17][:].clone())
min2 = min2**2
min2 = torch.mean(min2,1)
loss = lamb*sum(min1)-(1-lamb)*sum(min2)
return 1/loss
model = SimpleRNN()
opt = torch.optim.Adam(model.parameters(), lr = 0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, 'min', factor=0.5, patience=5, verbose=True)
total_loss = 0
for epoch in range(1):
for signal in range(1):
for batch in range(0,2):
input_reshape = torch.reshape(dummy_signal, (100,5))
out_reshape = model(input_reshape.clone())
out = torch.reshape(out_reshape, (1, 500))
opt.zero_grad()
loss = my_loss(out,dummy_signal,dummy_signal)
loss.backward(retain_graph=True)
opt.step()
print(loss)
total_loss += loss.item()
Anyone got an idea what to change to avoid this error? I tried all kinds of solutions I found on the internet, but none has worked so far.
Also, perhaps you know a better way to implement the following:
The RNN is supposed to have an input size N and should process L samples, with L >> N until the loss function is called and the gradient step is performed.Is there a straight forward method to do this other than what we attempted?