Code for model:
class ratingModel(nn.Module):
def __init__(self, nhead, dim_model, dim_ff):
super(ratingModel, self).__init__()
self.num_head = nhead
self.dim_model = dim_model
self.dim_feedforward = dim_ff
self.decoder_layer = TransformerDecoderLayer(self.dim_model, self.num_head, self.dim_feedforward)
self.linear_layer = nn.Linear(self.dim_model, 2)
def forward(self, query_em, item_em):
dec_out = self.decoder_layer(tgt=query_em, memory=item_em)
dec_out = dec_out.squeeze(0)
ll_out = self.linear_layer(dec_out)
x = F.log_softmax(ll_out)
return x
Code for training loop looks like
EPOCHS = 1
train_step = len(train_dataloader)
val_step = len(val_dataloader)
for epoch in range(EPOCHS):
epoch_loss = 0
correct = 0
total = 0
predictions = []
cnt=0
for query_em, item_em, label in train_dataloader:
cnt+=1
query_em, item_em, label = query_em.to(device), item_em.to(device), label.to(device)
query_em = query_em.unsqueeze(0)
item_em = item_em.unsqueeze(0)
output = model(query_em, item_em)
loss = criterion(output, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
#pdb.set_trace()
epoch_loss += loss.item()
_,pred = torch.max(output, dim=1)
predictions.append(pred)
correct += torch.sum(pred==label).item()
total += label.size(0)
if cnt%1000==0:
print(f'training loss: {epoch_loss/cnt}, training_acc:{correct/total}')
#pdb.set_trace()
print(f'training loss: {epoch_loss/train_step}, training_acc:{correct/total}')
model.eval()
with torch.no_grad():
correct = 0
total = 0
epoch_loss = 0
for query_em, item_em, label in val_dataloader:
query_em, item_em, label = query_em.to(device), item_em.to(device), label.to(device)
query_em = query_em.unsqueeze(0)
item_em = item_em.unsqueeze(0)
output = model(query_em, item_em)
loss = criterion(output, label)
epoch_loss += loss.item()
_,pred = torch.max(output, dim=1)
correct += torch.sum(pred==label).item()
total += label.size(0)
print(f'validation loss: {epoch_loss/val_step}, validation acc:{correct/total}')
#for param in model.parameters():
# print(param.data)
the output of training loop is pretty promising as can be seen in figure below:
However when I run the inference code below on same training data:
predictions_later = []
labels = []
model.eval()
with torch.no_grad():
correct = 0
total = 0
epoch_loss = 0
for query_em, item_em, label in train_dataloader:
query_em, item_em, label = query_em.to(device), item_em.to(device), label.to(device)
query_em = query_em.unsqueeze(0)
item_em = item_em.unsqueeze(0)
output = model(query_em, item_em)
loss = criterion(output, label)
epoch_loss += loss.item()
_,pred = torch.max(output, dim=1)
correct += torch.sum(pred==label).item()
total += label.size(0)
predictions_later.append(pred)
labels.append(label)
print(f'validation loss: {epoch_loss/val_step}, validation acc:{correct/total}')
The results is pretty subdued accuracy and moreover it’s just predicting all the entry to be of 0 class.
validation loss: 138.1207146283448, validation acc:0.20694013824686563
I am new to pytorch so i might be missing something obvious here. Using pytorch ‘1.9.0’ with cuda ‘11.1’. I am training model on GPU.