Hi,
When using BinaryCrossEntropy I get good better results than we I use Crossentropy.
Here is the code that I am using:
class BERT(nn.Module):
def __init__(self):
super(BERT, self).__init__()
options_name = "bert-base-uncased"
self.encoder = BertForSequenceClassification.from_pretrained(options_name, num_labels=3)
for param in self.encoder.bert.parameters():
param.requires_grad = True
def forward(self, text, label):
text_fea = self.encoder(text, labels=label)[0]
return text_fea
def train(model,
optimizer,
criterion = nn.CrossEntropyLoss(),
train_loader = train_iter,
valid_loader = valid_iter,
num_epochs = 5,
eval_every = len(train_iter) // 2,
file_path = destination_folder):
# training loop
model.train()
for epoch in range(num_epochs):
for titletext, labels in train_loader:
labels = labels.type(torch.LongTensor)
labels = labels.to(device)
titletext = titletext.type(torch.LongTensor)
titletext = titletext.to(device)
optimizer.zero_grad()
y_train_pred = model(titletext, None)
loss = criterion(y_train_pred, labels)
loss.backward()
optimizer.step()
Any idea what could be wrong?
thank you