I am facing this error after i was told to do retain_graph = True in loss.backward().Here is my error
one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [100, 400]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Here is my model architecture
def create_emb_layer(weights_matrix, non_trainable=False):
weights_matrix = torch.tensor(weights_matrix)
num_embeddings, embedding_dim = 16404,50
emb_layer = nn.Embedding(16404, 50)
emb_layer.load_state_dict({'weight': weights_matrix})
if non_trainable:
emb_layer.weight.requires_grad = False
return emb_layer, num_embeddings, embedding_dim
class Sentiglove(nn.Module):
def __init__(self,weights_matrix):
super().__init__()
self.embedding, num_embeddings, embedding_dim = create_emb_layer(weights_matrix, True)
self.conv1 = nn.Conv1d(in_channels = 48,out_channels = 32, kernel_size = 3, stride = 1)
self.lstm = nn.LSTM(input_size = 48 , hidden_size = 100,bidirectional=True, batch_first = True)
self.dropout = nn.Dropout(0.3)
#self.fc = nn.Linear(200, 5)
self.fc = nn.Linear(6400, 5)
if (torch.cuda.is_available()):
self.hidden = (torch.zeros(2, 50, 100).cuda(),torch.zeros(2, 50, 100).cuda())
else:
self.hidden = (torch.zeros(2, 50, 100),torch.zeros(2, 50, 100))
def forward(self,x):
x = self.embedding(x)
x = F.relu(self.conv1(x))
lstm_out , self.hidden = self.lstm(x,self.hidden)
lstm_out = self.dropout(lstm_out)
lstm_out = lstm_out.contiguous().view(50, -1)
out = F.relu(self.fc(lstm_out))
return F.log_softmax(out,dim=1)
Here is the training code
import time
start = time.time()
epochs = 1
val_correct = []
val_loss = []
train_correct = []
train_loss = []
for i in range(epochs):
trn_corr = 0
trn_loss = 0
for b, (x_train,y_train) in enumerate(train_loader):
with torch.autograd.set_detect_anomaly(True):
b+=1
#y_pred = model(x_train.cuda())
y_pred = model(x_train)
#y_train = y_train.cuda()
loss = criterion(y_pred,y_train)
predicted = torch.max(y_pred.data,1)[1] #gives the indices of the highest number in each row
trn_corr += (predicted == y_train).sum()
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
if(b%100==0):
print(f"Epoch: {i} batch: {b} loss {loss.item()} accuracy {trn_corr.item()/(b*50)}")
train_loss.append(loss)
train_correct.append(trn_corr)
print(time.time() - start)
Hope i get a quick reply as it is required for my research project