Model Code
class BertClassifier(nn.Module):
def __init__(self, dropout=0.5):
super(BertClassifier, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-cased', from_tf=False)
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, 1)
self.relu = nn.Sigmoid()
def forward(self, input_id, mask):
_, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
final_layer = self.relu(linear_output)
return final_layer
DataLoading
train, val = Dataset(df_train), Dataset(df_val)
train_dataloader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val, batch_size=32)
Train method
def train(model, train_data, val_data, learning_rate, epochs):
# train, val = Dataset(train_data), Dataset(val_data)
# train_dataloader = torch.utils.data.DataLoader(train, batch_size=16, shuffle=True)
# val_dataloader = torch.utils.data.DataLoader(val, batch_size=16)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr= learning_rate)
trainlen=24000
vallen=3000
if use_cuda:
model = model.cuda()
criterion = criterion.cuda()
for epoch_num in range(epochs):
total_acc_train = 0
total_loss_train = 0
y_pred=[]
y_true=[]
for train_input, train_label in tqdm(train_data):
train_label = train_label.to(device)
mask = train_input['attention_mask'].to(device)
input_id = train_input['input_ids'].squeeze(1).to(device)
output = model(input_id, mask)
#_, predt = torch.max(output, 1)
batch_loss = criterion(output.float(), train_label.float().unsqueeze(1))
total_loss_train += batch_loss.item()
#print(output, train_label)
acc = (output.argmax(dim=1) == train_label).sum().item()
total_acc_train += acc
# pred = np.round(output.detach())
# target = np.round(train_label.detach())
# y_pred.extend(pred.tolist())
# y_true.extend(target.tolist())
#batch_loss= batch_loss(batch_loss, requires_grad=True)
optimizer.zero_grad()
#batch_loss.requires_grad=True
batch_loss.backward()
optimizer.step()
total_acc_val = 0
total_loss_val = 0
with torch.no_grad():
for val_input, val_label in val_dataloader:
val_label = val_label.to(device)
mask = val_input['attention_mask'].to(device)
input_id = val_input['input_ids'].squeeze(1).to(device)
output = model(input_id, mask)
#_, predt = torch.max(output, 1)
batch_loss = criterion(output.float(), val_label.float().unsqueeze(1))
total_loss_val += batch_loss.item()
acc = (output.argmax(dim=1) == val_label).sum().item()
total_acc_val += acc
print(
f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / int(trainlen): .3f} \
| Train Accuracy: {total_acc_train / int(trainlen): .3f} \
| {total_acc_train} and {trainlen}\
| Val Loss: {total_loss_val / int(vallen): .3f} \
| Val Accuracy: {total_acc_val / int(vallen): .3f}\
| {total_acc_val} and {vallen}'
)
EPOCHS = 5
model = BertClassifier()
LR = 0.001
train(model, train_dataloader, val_dataloader, LR, EPOCHS)
The reason i am using trainlen value saperaletly while calculating accuracy because if take the length value of trainloader it is giving me the number of batches value.