I am trying to solve a NLP problem on where we have to decide whether r two sentences(premise and hypothesis) are either not related, related, or opposite.
I have used pretrained BERT model from hugging face as my model.
here is my model
class BertModel(nn.Module):
def __init__(self,model):
super(BertModel,self).__init__()
self.bert_model = model
self.dropout = nn.Dropout(0.3)
self.linear = nn.Linear(768*2,3)
def forward(self,xb):
o1,_ = self.bert_model(xb)
apool = torch.mean(o1, 1)
mpool, _ = torch.max(o1, 1)
cat = torch.cat((apool, mpool), 1)
x = self.dropout(cat)
return self.linear(x)
this is my dataloader
class DataTokenizer(Dataset):
def __init__(self,data,model_path,max_len,text_transform=None,one_hot=False):
self.max_len = max_len
self.text = data[['premise','hypothesis']].values.tolist()
self.text_transform = text_transform
if one_hot:
labels = pd.get_dummies(data.label)
else:
labels = data.label
self.labels = labels.values.tolist()
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
def get_tokens(self,text):
encode = self.tokenizer.encode_plus(
text,
max_length = self.max_len,
pad_to_max_length=True
)
return encode['input_ids']
def __len__(self):
return len(self.text)
def __getitem__(self,idx):
text = self.text[idx]
tokens = self.get_tokens(text)
tokens = torch.tensor(tokens)
label = self.labels[idx]
label = torch.tensor(label,dtype=torch.float)
return tokens, label
and here is my training loop.
def run():
losses = list()
loss_fn = nn.CrossEntropyLoss()
def train_loop(train_loader,model,optimizer,loss_fn,device,scheduler=None):
model.train()
for i, batch in (enumerate(train_loader)):
#getting the data from data loader
input_tokens , labels = batch
# putting the data in device
input_tokens = input_tokens.to(device, dtype=torch.long)
labels = labels.to(device, dtype= torch.long)
optimizer.zero_grad()
output = model(input_tokens)
loss = loss_fn(output,labels)
losses.append(loss)
if i % 10 == 0:
print(f'i={i}, loss={loss}')
# print(output,labels)
loss.backward()
optimizer.step()
if scheduler is not None:
scheduler.step()
lr = 1e-5
MAX_LEN = 100
EPOCHS = 10
batch_size = 32
train_ds = DataTokenizer(train_data,MODEL_PATH,MAX_LEN)
train_dl = torch.utils.data.DataLoader(
train_ds,
batch_size=batch_size,
shuffle=True,
num_workers=4,
drop_last=True,
)
device = torch.device('cuda')
model = mx.to(device)
num_train_steps = int(len(train_ds) / batch_size/ EPOCHS)
optimizer = AdamW(model.parameters(), lr=lr)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=0,
num_training_steps=num_train_steps
)
print('train_start')
for i in range(EPOCHS):
train_loop(train_dl,model,optimizer,loss_fn,device,scheduler=None)
torch.save(model.state_dict(),'model.bin')
return losses
I ran this model for 10 epochs for learning rate of 1e-3, 1e-4, 1e-5.
but my loss is always around 1.05 for 10 epochs. and the output from the model is always around [0.33,0.33.0.33] like random guessing.
I want to know the reason why my model is not training.