I have constructed this model from scratch and it doesn’t learn. the accuracy score is always 0.5 or 0.4375. And val is stuck too. Any help will be appreciated.
This is My Dataset Loader:
class Dataset(torch.utils.data.Dataset):
def init(self,df,max_len=96):self.df = df self.max_len = max_len self.labeled = (df.shape[1] == 2) self.tokenizer = transformers.DistilBertTokenizer.from_pretrained('distilbert-base-uncased') def __getitem__(self,index): data = {} row = self.df.iloc[index] ids,masks,labels = self.get_input_data(row) data['ids'] = ids data['masks'] = masks if self.labeled: data['labels'] = torch.tensor(labels,dtype=torch.float64) return data def __len__(self): return len(self.df) def get_input_data(self,row): ids = self.tokenizer.encode(row[0],add_special_tokens=True) pad_len = self.max_len - len(ids) if pad_len > 0 : ids += [0]*pad_len ids = torch.tensor(ids) masks = torch.where(ids != 1 , torch.tensor(1),torch.tensor(0)) return ids,masks,row[1]
This my Model
class Model(nn.Module):
def init(self):
super(Model,self).init()
self.distilBert = transformers.DistilBertModel.from_pretrained(‘distilbert-base-uncased’)
self.fc0 = nn.Linear(768,256)
self.fc1 = nn.Linear(256,1)
self.lr = nn.LeakyReLU()
nn.init.normal_(self.fc0.weight,std= 0.2)
nn.init.normal_(self.fc0.bias ,0.1)
nn.init.normal_(self.fc1.weight,std =0.2)
nn.init.normal_(self.fc1.bias, 0.1)def forward(self,input_ids,attention_mask): src = self.distilBert(input_ids,attention_mask) src = src[0][:,0,:] x = self.fc0(src) x = self.lr(x) x = self.fc1(x) return x
And this is my training loop :
criterion = nn.BCELoss()
model = Model().to(‘cuda’)
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)for epoch in range(epochs):
epoch_loss = 0 val_loss = 0 model.train() for data in tqdm(train_loader): ids = data['ids'].cuda() masks = data['masks'].cuda() labels = data['labels'].cuda() optimizer.zero_grad() outputs = model(ids,masks) outputs = F.sigmoid(outputs) loss = criterion(outputs.double(),labels) loss.backward() optimizer.step() epoch_loss += loss.item() outputs = outputs.cpu().detach().numpy() labels = labels.cpu().detach().numpy() outputs = np.argmax(outputs,axis=1) print(f'Train Epoch {epoch} : Loss {epoch_loss/len(train_loader)}') print("Train Accuracy : ",accuracy_score(outputs,labels)) model.eval() for data in val_loader: ids = data['ids'].cuda() masks = data['masks'].cuda() labels = data['labels'].cuda() outputs = model(ids,masks) outputs = F.sigmoid(outputs) loss = criterion(outputs.double(),labels) val_loss += loss.item() outputs = outputs.cpu().detach().numpy() labels = labels.cpu().detach().numpy() outputs = np.argmax(outputs,axis=1) print(f'Val Epoch {epoch} : Loss {val_loss/len(val_loader)}') print("Val Accuracy : ",accuracy_score(outputs,labels))