Hi there!
I am trying to run a simple CNN2LSTM model and facing this error:
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn.
The strange part is that the current model is a simpler version of my previous model which worked absolutely fine.
To solve this error, I have tried setting “requires_grad=True” for which I modified my target tensors to float() but that throws another error –
RuntimeError: Expected tensor for argument #1 ‘indices’ to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding).
I am not sure where I am going wrong. Why “requires_grad” is not getting set up by default.
Any help will be appreciated! Thanks in advance!
# -*- coding: utf-8 -*-
import time
import torch
from preprocessing.preprocess_images import preprocess_images
def train(trg_field, model, batch_size, iterator, optimizer, criterion,device, write_file=False):
model.train()
epoch_loss = 0
trg_seqs = open('logs/train_targets.txt', 'w')
pred_seqs = open('logs/train_predicted.txt', 'w')
for i, batch in enumerate(iterator):
# initailize the hidden state
#h = model.encoder.init_hidden(batch_size)
# grab the image and preprocess it
img_names = batch.id
src = preprocess_images(img_names, 'data/images/')
# src will be list of image tensors
# need to pack them to create a single batch tensor
src = torch.stack(src).to(device)
#print('train_src_shape: ', src.shape)
# target mml
trg = batch.mml.to(device)
# setting gradients to zero
optimizer.zero_grad()
print(src.requires_grad)
print(trg.requires_grad)
output, pred = model(trg_field, src, trg, True, True, 0.5)
# translating and storing trg and pred sequences in batches
if write_file:
batch_size = trg.shape[1]
for idx in range(batch_size):
trg_arr = [trg_field.vocab.itos[itrg] for itrg in trg[:,idx]]
trg_seq = " ".join(trg_arr)
trg_seqs.write(trg_seq + '\n')
pred_arr = [trg_field.vocab.itos[ipred] for ipred in pred[:,idx].int()]
pred_seq = " ".join(pred_arr)
pred_seqs.write(pred_seq+'\n')
#trg = [trg len, batch size]
#output = [trg len, batch size, output dim]
output_dim = output.shape[-1]
output = output[1:].view(-1, output_dim)
trg = trg[1:].view(-1)
#trg = [(trg len - 1) * batch size]
#output = [(trg len - 1) * batch size, output dim]
loss = criterion(output, trg)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
epoch_loss += loss.item()
return epoch_loss/len(iterator)