hey!
I’m trying to train a model based on BERT pre-trained model with two outputs for category and subcategory.
problem is the model is not learning and I get the same statistical results every epoch…
the training loop:
for epoch in range(1, epochs + 1):
if epoch > 1:
train_dataloader = classifier.create_dynamic_padding(db=train_data,
batch_size=batch_size,
product_targets='products',
action_targets='actions',
random_index=17)
classifier.train()
loss_train_total = 0
status_bar = trange(0, len(train_dataloader['py_inputs']), leave=True, position=0, desc=f'Epoch {epoch} / {epochs}')
for batch in status_bar:
status_bar.set_postfix({'Average loss': loss_train_total / (batch + 1)})
classifier.model.zero_grad()
b_input_ids = train_dataloader['py_inputs'][batch].to(classifier.device)
b_input_mask = train_dataloader['py_attn_masks'][batch].to(classifier.device)
b_products = train_dataloader['py_products'][batch].to(classifier.device)
b_actions = train_dataloader['py_actions'][batch].to(classifier.device)
outputs = classifier(input_ids=b_input_ids,
attention_mask=b_input_mask,
products_targets=b_products,
actions_targets=b_actions)
products_loss = criterion(torch.tensor(outputs[0], requires_grad=True), b_products)
actions_loss = criterion(torch.tensor(outputs[1], requires_grad=True), b_actions)
loss = (0.5*products_loss + 0.5*actions_loss)
loss_train_total += loss.item()
loss.backward()
torch.nn.utils.clip_grad_norm_(classifier.model.parameters(), 1.0)
optimizer.step()
scheduler.step()
the model class:
class TwoLevelsClassificator(nn.Module):
"""
A class to create a model to classified product
"""
def __init__(self, products, actions, checkpoints):
super(TwoLevelsClassificator, self).__init__()
self.num_of_products = len(products)
self.num_of_actions = len(actions)
self.tokenizer = AutoTokenizer.from_pretrained(checkpoints)
self.model = AutoModel.from_pretrained(checkpoints,
config=AutoConfig.from_pretrained(checkpoints,
output_attention=True,
output_hidden_state=True))
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.model.to(self.device)
self.model.config.label2id = {'products': {product: product_id for product_id, product in enumerate(products)},
'actions': {action: action_id for action_id, action in enumerate(actions)}}
self.model.config.id2label = {'products': {product_id: product for product, product_id in
self.model.config.label2id['products'].items()},
'actions': {action_id: action for action, action_id in
self.model.config.label2id['actions'].items()}}
self.products_out_layer = nn.Linear(self.model.config.hidden_size, self.num_of_products)
self.actions_out_layer = nn.Linear(self.model.config.hidden_size, self.num_of_actions)
self.embedding = nn.Embedding(self.model.config.vocab_size, self.model.config.hidden_size, padding_idx=self.model.config.pad_token_id)
self.out_softmax = nn.Softmax(dim=1)
def forward(self, input_ids=None, attention_mask=None, products_targets=None, actions_targets=None):
bert_out = self.model(input_ids=input_ids, attention_mask=attention_mask)
bert_last_hidden_state = bert_out[0]
products_output_layer = self.products_out_layer(bert_last_hidden_state[:, 0, :].view(-1, 768)).detach().numpy()
products_labels = [f'{self.model.config.id2label["products"][np.argmax(preds_vector)].replace("_", " ")}' for preds_vector in products_output_layer]
tokenizer_labels = self.tokenizer(products_labels)
if False in [len(i) == len(tokenizer_labels['input_ids']) for i in tokenizer_labels['input_ids']]:
tokenizer_labels = self.padd_to_max_size(tokenized_seq=tokenizer_labels)
embedded_products_layer = self.embedding(torch.tensor(tokenizer_labels['input_ids']))
actions_output_layer = self.actions_out_layer(torch.cat((bert_last_hidden_state, embedded_products_layer.expand(bert_last_hidden_state.shape[0], -1, -1)), dim=1)[:, 0, :].view(-1, 768)).detach().numpy()
return products_output_layer, actions_output_layer