Hello all,
I have been such an issue for the first time. The loss computed have requires_grad = False by default but it should be True, I have no idea why this is happening. Apart from that even if I explicitly change the requires grad to true, the model parameters are still not getting updated. Please look model and training code below. I have checked the requires grad of model parameters and they are True.
Regards
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
class LSTMClassifier(nn.Module):
def __init__(self, ):
super(LSTMClassifier, self).__init__()
self.lstm = nn.LSTM(1, 64, num_layers=1,batch_first =True )
self.hidden2out = nn.Linear(64,2)
self.dropout_layer = nn.Dropout(p=0.3)
def forward(self, data):
outputs, (ht, ct) = self.lstm(data, None)
output = self.dropout_layer(ht[-1])
output = self.hidden2out(output)
return output
model = LSTMClassifier().cuda()
def train(model,dataloaders,num_epochs,optimizer,patience = None):
i = 0
phase1 = dataloaders.keys()
criterion = nn.CrossEntropyLoss().cuda()
train_loader = dataloaders['train']
if(torch.cuda.is_available()):
device = 'cuda'
else:
device = 'cpu'
if(patience!=None):
earlystop = EarlyStopping(patience = patience,verbose = True)
for epoch in range(num_epochs):
print('Epoch:',epoch)
epoch_metrics = {"loss": [], "acc": []}
for phase in phase1:
if phase == 'train':
model.train()
else:
model.eval()
for batch_idx, (data, target) in enumerate(dataloaders[phase]):
data, target = Variable(data), Variable(target)
data = data.type(torch.FloatTensor).to(device)
target = target.type(torch.LongTensor).to(device)
optimizer.zero_grad()
data.requires_grad = True
output = model(data)
output.requires_grad = True
loss = criterion(output, target)
loss.requires_grad = True
acc = 100 * (output.detach().argmax(1) == target).cpu().numpy().mean()
epoch_metrics["loss"].append(loss.item())
epoch_metrics["acc"].append(acc)
sys.stdout.write(
"\r[Epoch %d/%d] [Batch %d/%d] [Loss: %f (%f), Acc: %.2f%% (%.2f%%)]"
% (
epoch,
num_epochs,
batch_idx,
len(dataloaders[phase]),
loss.item(),
np.mean(epoch_metrics["loss"]),
acc,
np.mean(epoch_metrics["acc"]),
)
)
if(phase =='train'):
loss.backward()
optimizer.step()
epoch_acc = np.mean(epoch_metrics["acc"])
epoch_loss = np.mean(epoch_metrics["loss"])
if(phase == 'val' and patience !=None):
earlystop(epoch_loss,model)
if(earlystop.early_stop):
print("Early stopping")
model.load_state_dict(torch.load('./checkpoint.pt'))
print('{} Accuracy: {}'.format(phase,epoch_acc.item()))
break
print('{} Accuracy: {}'.format(phase,epoch_acc.item()))