def train(model, train_data, val_data, learning_rate, epochs):
train, val = Dataset(train_data), Dataset(val_data)
train_dataloader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val, batch_size=32)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
criterion = nn.BCEWithLogitsLoss()
optimizer = Adam(model.parameters(), lr= learning_rate)
if use_cuda:
model = model.cuda()
criterion = criterion.cuda()
for epoch_num in range(epochs):
total_acc_train = 0
total_loss_train = 0
for train_input, train_label in tqdm(train_dataloader):
train_label = train_label.to(device)
mask = train_input['attention_mask'].to(device)
input_id = train_input['input_ids'].squeeze(1).to(device)
output = model(input_id, mask)
_, pred = torch.max(output, 1)
#train_label.float()
print(pred.dtype)
print(train_label.dtype)
batch_loss = criterion(pred, train_label.float())
total_loss_train += batch_loss.item()
acc = (output.argmax(dim=1) == train_label).sum().item()
total_acc_train += acc
model.zero_grad()
batch_loss.backward()
optimizer.step()
total_acc_val = 0
total_loss_val = 0
with torch.no_grad():
for val_input, val_label in val_dataloader:
val_label = val_label.to(device)
mask = val_input['attention_mask'].to(device)
input_id = val_input['input_ids'].to(device)
output = model(input_id, mask)
_, pred = torch.max(output, 1)
batch_loss = criterion(pred, val_label.float())
total_loss_val += batch_loss.item()
acc = (output.argmax(dim=1) == val_label).sum().item()
total_acc_val += acc
print(
f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} \
| Train Accuracy: {total_acc_train / len(train_data): .3f} \
| Val Loss: {total_loss_val / len(val_data): .3f} \
| Val Accuracy: {total_acc_val / len(val_data): .3f}')
EPOCHS = 5
model = BertClassifier()
LR = 1e-6
train(model, df_train, df_val, LR, EPOCHS)
Thank You in Advance