Hi,
Currently, I’m facing the issue with cross entropy loss. My target variable is one-hot encoding values such as [0,1,0,…,0] then I would have RuntimeError: Expected floating point type for target with class probabilities, got Long.
On the other hand, if i were to not perform one-hot encoding and input my target variable as is, then i face the issue of RuntimeError: “host_softmax” not implemented for ‘Long’
# MODEL DEFINED
class BertClass(torch.nn.Module):
def __init__(self):
super(BertClass, self).__init__()
self.model = BertModel.from_pretrained('bert-base-uncased')
self.l1 = torch.nn.Linear(768, 512) # linear layer 1
self.l2 = torch.nn.Linear(512, len(lb.classes_)) # num. of classes
self.dropout = torch.nn.Dropout(0.2)
def forward(self, input_ids, attention_mask, token_type_ids):
output = self.model(input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids)
hidden_state = output[0]
output = hidden_state[:, 0]
output = self.l1(torch.nn.ReLU()(output))
output = self.dropout(output)
output = self.l2(output)
return output
# INSTANTIATE MODEL
model = BertClass()
model = model.to(device)
# IMBALANCED DATA
class_weights = class_weight.compute_class_weight(class_weight = 'balanced',
classes = np.unique(available_df['controlId']),
y = available_df['controlId'])
class_weights = torch.tensor(class_weights)
loss_function = torch.nn.CrossEntropyLoss()(weight=class_weights)
optimizer = torch.optim.Adam(params = model.parameters(), lr=LEARNING_RATE)
# TRAIN THE MODEL
def train(epoch):
tr_loss = 0
n_correct = 0
nb_tr_steps = 0
nb_tr_examples = 0
model.train()
for _,data in enumerate(train_loader, 0):
print(data)
ids = data['ids'].to(device)
mask = data['mask'].to(device)
token_type_ids = data['token_type_ids'].to(device)
targets = data['targets'].to(device)
print(targets)
outputs = model(ids, mask, token_type_ids).to(device)
outputs = outputs.long()
print(outputs.size())# torch.Size([8, 325])
print(targets.size()) # with one hot encoding torch.Size([8]). Without one-hot encoding torch.Size([8, 325])
loss = loss_function(outputs, targets.long())
tr_loss += loss.item()
big_val, big_idx = torch.max(outputs.data, dim=1)
n_correct += calculate_accuracy(big_idx, targets)
nb_tr_steps += 1
nb_tr_examples+=targets.size(0)
if _%1000==0:
loss_step = tr_loss/nb_tr_steps
accu_step = (n_correct*100)/nb_tr_examples
print(f"Training Loss per 5000 steps: {loss_step}")
print(f"Training Accuracy per 5000 steps: {accu_step}")
optimizer.zero_grad()
loss.backward()
# # When using GPU
optimizer.step()
print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
epoch_loss = tr_loss/nb_tr_steps
epoch_accu = (n_correct*100)/nb_tr_examples
print(f"Training Loss Epoch: {epoch_loss}")
print(f"Training Accuracy Epoch: {epoch_accu}")
return epoch_accu
EPOCHS = 1
for epoch in range(EPOCHS):
train(epoch)