I have a binary classification problem using logistic regression. The input of linear layer is 1 dimension.
When training, the loss of training set decreases but the model keeps predicting 0 for every data sample. I checked the output: the first element in the 2-dimension output is far larger than the second one. It’s been no change after training. How to resolve this?
class ERModeler(nn.Module):
def __init__(self, vocab_size, embedding_dim, weInput):
super(ERModeler, self).__init__()
self.embeddings = nn.Embedding.from_pretrained(weInput)
self.weight_embeddings = nn.Embedding(vocab_size, 1, scale_grad_by_freq=True)
self.weight_embeddings.weight.data.fill_(1)
self.linear = nn.Linear(1, 2)
def forward(self, input1, input2):
#Word Vector Look Up
embeds1 = self.embeddings(input1)
embeds2 = self.embeddings(input2)
# add weight
embeds1 = embeds1*self.weight_embeddings(input1)
embeds2 = embeds2*self.weight_embeddings(input2)
#Average
embeds1 = embeds1.mean(0)
embeds2 = embeds2.mean(0)
#Similariy
out = embeds1.dot(embeds2)/(embeds1.norm() * embeds2.norm()).view(-1, 1)
#Linear
out = self.linear(out)
return out
model = ERModeler(VOC_SIZE, EMBEDDING_DIM, weInput)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.0001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
iteration = 0
for epoch in range(10):
for i in range(len(pairs)):
# Load images as Variable
s1 = pairs[i][0]
s2 = pairs[i][1]
l = labels[i]
# Clear gradients w.r.t. parameters
optimizer.zero_grad()
# Forward pass to get output/logits
outputs = model(string_to_index(s1, word2index),
string_to_index(s2, word2index))
# Calculate Loss: softmax --> cross entropy loss
loss = criterion(outputs, l)
# Getting gradients w.r.t. parameters
loss.backward()
# Updating parameters
optimizer.step()
iteration += 1
if iteration % 1000 == 0:
# Calculate Accuracy
correct = 0
total = 0
loss_sum = 0
# Iterate through test dataset
for j in range(len(pairs)):
# Load images to a Torch Variable
s1 = pairs[j][0]
s2 = pairs[j][1]
l = labels[j]
# Forward pass only to get logits/output
outputs = model(string_to_index(s1, word2index),
string_to_index(s2, word2index))
loss_sum += criterion(outputs, l)
# Get predictions from the maximum value
# 100 x 1
_, predicted = torch.max(outputs.data, 1)
# Total number of labels
total += 1
# Total correct predictions
correct += (predicted == l).item()
accuracy = 100 * correct / total
print(loss_sum.item())
print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iteration, loss.item(), accuracy))