I Have implemented an Autoencoder that takes in a Tweet and the Target, concatenates the final hidden state after passing them through encoder, and reconstructs as well as classifies them into 3 classes.
But the loss is not changing at all and is constant at 17.226, pls help?
EDIT : so the loss is changing but it’s fluctuating betwen 17.223 - 17.235 and that too very, slowly. what can i do to solve/find the problem?
src1 = trg1 = tweet
src2 = trg2 = target
class Seq2Seq(nn.Module):
def __init__(self,encoder,decoder, device):
super().__init__()
self.decoder = decoder
self.encoder = encoder
self.device = device
hid_dim1 = encoder.hid_dim
hid_dim2 = decoder.hid_dim
self.linear = nn.Linear(hid_dim1*2, hid_dim2)
self.out = nn.Linear(hid_dim2,3)
def forward(self,src1,trg1,src2,trg2,teacher_forcing_ratio = 0.5):
batch_size = trg1.shape[1]
max_len = trg1.shape[0]
target_voc_size = self.decoder.output_dim
outputs = torch.zeros(max_len, batch_size, target_voc_size).to(self.device)
context1 = self.encoder(src1)
hidden1 = context1
input = trg1[0,:]
max_len2 = trg2.shape[0]
outputs2 = torch.zeros(max_len2,batch_size,target_voc_size).to(self.device)
context2 = self.encoder(src2)
hidden2 = context2
temp = torch.cat((hidden1,hidden2), dim = 2)
hidden = self.linear(temp)
context = hidden
for t in range(1,max_len):
#print(t)
output, hidden = self.decoder(input, hidden, context)
outputs[t] = output
teacher_force = random.random() < teacher_forcing_ratio
top1 = output.max(1)[1]
input = (trg1[t] if teacher_force else top1)
input2 = trg2[0,:]
for t in range(1, max_len2):
output, hidden = self.decoder(input2, hidden, context)
outputs2[t] = output
teacher_force = random.random() < teacher_forcing_ratio
input2 = output.max(1)[1]
out = F.relu(self.out(context))
return outputs, outputs2, out
and i am implementing the criterion as
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
criterion2 = nn.BCEWithLogitsLoss()
while I am implementing the loss as follows -
lossA = criterion(output1[1:].view(-1, output1.shape[2]), trg[1:].view(-1))
lossB = criterion(output2[1:].view(-1, output2.shape[2]), trg2[1:].view(-1))
lossC = criterion2(y,output3)
loss = lossA + lossB + lossC
loss.backward()
Where could be the problem?