Concatenate the output of Bert and transformer

Omama · October 17, 2021, 4:24pm

How I can feed two pytorch models with different data, then concatenate the output of the two models before prediction.

ptrblck · October 18, 2021, 1:00am

You could basically write the model in the same way you’ve described it:

out1 = model1(in1)
out2 = model2(in2)
out = torch.cat((out1, out2), dim=1)

pred = classifier(out)

Omama · October 18, 2021, 10:05am

Thanks for your response. I’m still confused. This is what I did, But I’m stuck in point number 3:
1- I created two models (BERTClassA, BERTClassB), and detatch the classifier layers from them.
2- Then I concatenated the output from the two models in train function.
3- Then How I can loop through each data loader and pass each one to the models, then taking the output back from two models and concatenate it, after that pass it to the ensamble class.

Data

training_loader1 = DataLoader(training_set2, **train_params)
training_loader2 = DataLoader(training_set2, **train_params)

Model A

class BERTClassA(torch.nn.Module):
def init(self):
super(BERTClassB, self).init()
self.l1 = BertModel.from_pretrained(“bert-base-uncased”)
self.pre_classifier = torch.nn.Linear(768, 768)
self.dropout = torch.nn.Dropout(.3)

def forward(self, input_ids, attention_mask):
    output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
    hidden_state = output_1[0]
    pooler = hidden_state[:, 0]
    pooler = self.pre_classifier(pooler)
    pooler = torch.nn.ReLU()(pooler)
    pooler = self.dropout(pooler)
    return output

Model B

class BERTClassB(torch.nn.Module):
def init(self):
super(BERTClassB, self).init()
self.l1 = BertModel.from_pretrained(“bert-base-uncased”)
self.pre_classifier = torch.nn.Linear(768, 768)
self.dropout = torch.nn.Dropout(.3)

def forward(self, input_ids, attention_mask):
    output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
    hidden_state = output_1[0]
    pooler = hidden_state[:, 0]
    pooler = self.pre_classifier(pooler)
    pooler = torch.nn.ReLU()(pooler)
    pooler = self.dropout(pooler)
    return output

Train function

def train(epoch):
tr_loss = 0
n_correct = 0
nb_tr_steps = 0
nb_tr_examples = 0
model.train()

for _,data1  in enumerate(training_loader1 , 0):
    for _,data2  in enumerate(training_loader2 , 0):
        ids = data1['ids'].to(device, dtype = torch.long)
        mask = data1['mask'].to(device, dtype = torch.long)
        targets1 = data1['targets'].to(device, dtype = torch.long)

        ids2 = data2['ids'].to(device, dtype = torch.long)
        mask2 = data2['mask'].to(device, dtype = torch.long)
        targets2 = data2['targets'].to(device, dtype = torch.long)



        outputs1 = modelBERT(ids, mask)
        outputs2 = modelBERT2(ids2, mask2)

        outputs = torch.cat((outputs1, outputs2), dim=1)

        

        
        ensamble_output = model(outputs)


        loss = loss_function(ensamble_output, targets1)
        tr_loss += loss.item()
        big_val, big_idx = torch.max(ensamble_output.data, dim=1)
        n_correct += calcuate_accu(big_idx, targets1)

        nb_tr_steps += 1
        nb_tr_examples+=targets1.size(0)
    
        if _%100==0:
            loss_step = tr_loss/nb_tr_steps
            accu_step = (n_correct*100)/nb_tr_examples 
            print(f"Training Loss per 100 steps: {loss_step}")
            print(f"Training Accuracy per 100 steps: {accu_step}")

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

    print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Training Loss Epoch: {epoch_loss}")
    print(f"Training Accuracy Epoch: {epoch_accu}")

    return

for epoch in range(EPOCHS):
train(epoch)

Ensamble function

class MyEnsemble(torch.nn.Module):
def init(self, modelA, modelB):
super(MyEnsemble, self).init()
self.modelA = modelA
self.modelB = modelB
self.classifier = torch.nn.Linear(768, 3)

def forward(self, x1, x2):
    x1 = self.modelA(x1)
    x2 = self.modelB(x2)
    x = torch.cat((x1, x2), dim=1)
    x = self.classifier(F.relu(x))
    return x

modelA = BERTClass()
modelB = BERTClassB()

model = MyEnsemble(modelA, modelB)
model.to(device)

ptrblck · October 19, 2021, 4:05am

I assume you would like to pass batches from training_loader1 as x1 and batches from training_loader2 as x2 to the model. If so, you could either iterate both loaders via zip or load the batches manually via:

iter_loader1 = iter(training_loader1)
batch1 = next(iter_loader1)
...

Omama · October 19, 2021, 2:30pm

Thanks a lot Ptrblck. Your answer worked with me.
Please help me with this error
TypeError: not all arguments converted during string formatting

Training function

for i in range(TRAIN_BATCH_SIZE):

    iter_loader1 = iter(training_loader1)
    batch1 = next(iter_loader1)
    iter_loader2 = iter(training_loader2)
    batch2 = next(iter_loader2)
    
    
    ids1 = batch1['ids'].to(device, dtype = torch.long)
    mask1 = batch1['mask'].to(device, dtype = torch.long)
    targets1 = batch1['targets'].to(device, dtype = torch.long)
    
    
    ids2 = batch2['ids'].to(device, dtype = torch.long)
    mask2 = batch2['mask'].to(device, dtype = torch.long)
    targets2 = batch2['targets'].to(device, dtype = torch.long)
    
    outputs = model(ids1, mask1, ids2, mask2)

    print(outputs)
    loss = loss_function(outputs, targets2)
    print(loss)
    tr_loss += loss.item()
    print(tr_loss)
    big_val, big_idx = torch.max(outputs.data, dim=1)
    print(big_val, big_idx)
    n_correct += calcuate_accu(big_idx, targets2)

    nb_tr_steps += 1
    print(nb_tr_steps)
    nb_tr_examples+=targets2.size(0)
    print(nb_tr_examples)
    #print (_)
    
    if _%100==0:
        loss_step = tr_loss/nb_tr_steps
        accu_step = (n_correct*100)/nb_tr_examples 
        print(f"Training Loss per 100 steps: {loss_step}")
        print(f"Training Accuracy per 100 steps: {accu_step}")

    optimizer.zero_grad()
    loss.backward()
    # # When using GPU
    optimizer.step()

print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
epoch_loss = tr_loss/nb_tr_steps
epoch_accu = (n_correct*100)/nb_tr_examples
print(f"Training Loss Epoch: {epoch_loss}")
print(f"Training Accuracy Epoch: {epoch_accu}")

return

ptrblck · October 19, 2021, 7:45pm

The new error is raised by pandas and I don’t know which line of code is causing the issue.
My guess would be pandas might be used for some preprocessing, which might fail?

Omama · October 19, 2021, 8:47pm

This line caused the error:
if _%100==0:

When I print the result for _ using one model without ensambling, it is just an integer number, but after I made my update with ensambling, this variable _ is a dataframe. But I don’t know how to fix it.

ptrblck · October 20, 2021, 5:43am

The _ might refer to the last return value in this case, wouldn’t it?
Change the _ to an explicit variable which tracks the iterations and it should work.

Omama · October 20, 2021, 8:56am

Thanks a lot Ptrblck… your answers very helpful