Hello, I am new to PyTorch and my task is translating a Binary Classification CNN model from Tensorflow into Pytorch. The model was working mostly as expected, but I must have changed something because all of the sudden it is not working at all.
The Loss never meaningfully changes as I train it, just fluctuates very slightly. So I know the weights are updating. But when I check the predictions, everything gets predicted at approximately 0.5, and that does not change.
I am using a custom loss function for Weighted BCE, as I need to penalize the minority class more heavily. I am pretty sure this loss function has something to do with this problem, but I can’t pinpoint what is wrong. Please review my code to see if you can spot what might be wrong. Thanks!
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.embedding = nn.Embedding.from_pretrained(embedding_weights)
self.drop = nn.Dropout(p = .25)
self.conv(nn.ModuleList([nn.Conv1d(64,32,n,1,'same') for n in [3,4,5]])) #applies 3 convolutional layers separately to the embeddings and concatenates for fc layers
self.m = nn.AdaptiveMaxPool1d(1) #replicates TF's GlobalMaxPool
self.fc1 = nn.Linear(263,128)
self.dropout1 = nn.Dropout(p = .2)
self.fc2 = nn.Linear(128, 64)
self.dropout2 = nn.Dropout(p = .2)
self.fc3 = nn.Linear(64, 32)
self.dropout3 = nn.Dropout(p = .2)
self.output = nn.Linear(32,1)
def forward(self, x):
x[0] = self.embedding(x[0])
output = []
for l in self.conv:
output.append(F.relu(self.m(l(self.drop(x[0]))).view(-1,32))) #creates a list with conv output, which will be concatenated
output.append(x[1])
output = torch.cat(output, dim = 1).float()
output = self.dropout1(nn.LeakyReLU(negative_slope = .2)(self.fc1(output)))
output = self.dropout2(nn.LeakyReLU(negative_slope = .2)(self.fc2(output)))
output = self.dropout3(nn.LeakyReLU(negative_slope = .2)(self.fc3(output)))
output = nn.Sigmoid(self.output(output)).view(-1)
return output
def WeightedBCELoss(output, label, weights = None):
output = torch.clamp(output, 1e-7, 1-1e-7)
if weights is not None:
assert len(weights) == 2
loss = weights[1] * (label * torch.log(output)) + weights[0] * ((1-label) * torch.log(1-output))
else:
loss = label * torch.log(output) + (1-label) * torch.log(1-output)
return torch.neg(torch.mean(loss))
Training loop:
class_weights = torch.FloatTensor(sklearn.utils.class_weight.compute_class_weight('balanced', np.unique(y_train), y_train))
num_epochs = 25
batch_size = 128
learning_rate = 0.001
criterion = WeightedBCELoss
optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate)
for epoch in num_epochs:
iterations = 0
iter_loss = 0.0
net.train()
for i, (inputs, labels) in enumerate(loader):
outputs = net(inputs)
loss = criterion(outputs, labels, class_weights)
iter_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
iterations += 1
if(i +1)%1000 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Step[{i+1}/{n_total_steps}], Batch Loss = {loss.item():.4f}, Epoch Loss = {iter_loss/iterations:.4f}')
train_loss.append(iter_loss/iterations)