BERT to implement

XiMing_Dong · December 18, 2020, 7:02pm

Hi everyone
I have a question about BERT
I want to bert to encode short desc and long desc instead of lstm and cnn. on the web, I just find something about BertForSequenceClassification this model and in this model, it needs to have a label, but in my dataset, there hasn’t label and only have data pairs such as A and B are similar. in my ideas, I just want to use bert mode and fine-tuning and use the same loss function. here is the structure of the model and here is the loss function! here is my code!

I do not sure what I read is right or not, but the result is not improved.
anyone can help me?
thanks a lot

Abhilash_Srivastava · December 20, 2020, 2:29am

Sentence pair similarity can be formulated like a binary classification problem. Hence, using BinaryCrossEntropy loss is a good idea.
Can you share the full code (not screenshots) here for better understanding?

XiMing_Dong · December 21, 2020, 4:46pm

Hi bro thanks for your kindly reply

XiMing_Dong · December 21, 2020, 4:48pm

import torch.nn as nn
from transformers import BertModel

from data_generator import *
import torch
import math
import torch.nn.functional as F

SEED=0
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic=True
torch.backends.cudnn.benchmark = False
class Residual(nn.Module):
def init(self, d, fn):
super(Residual, self).init()
self.fn = fn
self.projection = nn.Sequential(nn.Linear(d, d), fn, nn.Linear(d, d))

def forward(self, x):
return self.fn(x + self.projection(x))

class Net(nn.Module):
def init(self, args):
super(Net, self).init()
self.bert = BertModel.from_pretrained(‘bert-base-uncased’)
for param in self.bert.parameters():
param.requires_grad = False

self.info_proj = nn.Sequential(nn.Linear(args.n_prop, 100), nn.Tanh())
self.residual = Residual(300, nn.Tanh())
self.projection = nn.Linear(300, 100)

self.bert_proj = nn.Sequential(nn.Linear(768, 100), nn.Tanh())

def forward_cnn(self, x):
context = x[0]
mask = x[1]
pooled = self.bert(context, attention_mask=mask)
return pooled.last_hidden_state

def forward_rnn(self, x):
context = x[0]
mask = x[1]
pooled = self.bert(context, attention_mask=mask)
return pooled.last_hidden_state

def forward(self, x):
info = x[‘info’]
info_feature = self.info_proj(info.float())
bert_long= self.forward_cnn(x[‘desc’])
bert_long = torch.mean( bert_long, dim=1)
bert_long = self.bert_proj(bert_long)
bert_short = self.forward_rnn(x[‘short_desc’])
bert_short = torch.mean( bert_short, dim=1)
bert_short= self.bert_proj(bert_short)

feature = torch.cat([info_feature,bert_long, bert_short], -1)
# feature_res = self.residual(feature)
return self.projection(feature)

The next part is to generate a mask and ID

for bug_id in batch_bugs:
bug = pickle.load(open(os.path.join(’/content/drive/My Drive/DuplicateBugFinder/openOffice/bugs’, ‘{}.pkl’.format(bug_id)), ‘rb’))
desc_word.append(bug[‘description_long’][0:500])
short_desc_word.append(bug[‘description_short’])
info_ = np.concatenate((
to_one_hot(bug[‘bug_severity’], info_dict[‘bug_severity’]),
to_one_hot(bug[‘bug_status’], info_dict[‘bug_status’]),
to_one_hot(bug[‘component’], info_dict[‘component’]),
to_one_hot(bug[‘priority’], info_dict[‘priority’]),
to_one_hot(bug[‘product’], info_dict[‘product’]),
to_one_hot(bug[‘version’], info_dict[‘version’])))
info.append(info_)

encoded_long = []
encoded_short = []
for input_long in desc_word:
encoded_input_long = tokenizer.encode(input_long,add_special_tokens = True,)
encoded_long.append(encoded_input_long)

input_ids_long = pad_sequences(encoded_long, maxlen=500, dtype=“long”,value=0, truncating=“post”, padding=“post”)
attention_masks_long = []

for sent in input_ids_long :
att_mask = [int(token_id > 0) for token_id in sent]
attention_masks_long.append(att_mask)
attention_masks_long = torch.tensor( attention_masks_long).cuda()
encoded_long = torch.tensor( input_ids_long ).cuda()

for input_short in short_desc_word:
encoded_input_short = tokenizer.encode(input_short,add_special_tokens = True,)
encoded_short.append(encoded_input_short)
input_ids_short = pad_sequences(encoded_short, maxlen=30, dtype=“long”,value=0, truncating=“post”, padding=“post”)
attention_masks_short = []
for sent in input_ids_short :
att_mask = [int(token_id > 0) for token_id in sent]
attention_masks_short.append(att_mask)
attention_masks_short = torch.tensor(attention_masks_short).cuda()
encoded_short = torch.tensor(input_ids_short).cuda()
info = torch.from_numpy(np.array(info)).type(dtype)cuda()
batch_bugs = dict()
batch_bugs[‘info’] = info
batch_bugs[‘desc’] = (encoded_long,attention_masks_long)
batch_bugs[‘short_desc’] = (encoded_short,attention_masks_short)

return batch_bugs