Concatenate the output of Bert and transformer

How I can feed two pytorch models with different data, then concatenate the output of the two models before prediction.

You could basically write the model in the same way you’ve described it:

out1 = model1(in1)
out2 = model2(in2)
out = torch.cat((out1, out2), dim=1)

pred = classifier(out)
1 Like

Thanks for your response. I’m still confused. This is what I did, But I’m stuck in point number 3:
1- I created two models (BERTClassA, BERTClassB), and detatch the classifier layers from them.
2- Then I concatenated the output from the two models in train function.
3- Then How I can loop through each data loader and pass each one to the models, then taking the output back from two models and concatenate it, after that pass it to the ensamble class.

Data

training_loader1 = DataLoader(training_set2, **train_params)
training_loader2 = DataLoader(training_set2, **train_params)

Model A

class BERTClassA(torch.nn.Module):
def init(self):
super(BERTClassB, self).init()
self.l1 = BertModel.from_pretrained(“bert-base-uncased”)
self.pre_classifier = torch.nn.Linear(768, 768)
self.dropout = torch.nn.Dropout(.3)

def forward(self, input_ids, attention_mask):
    output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
    hidden_state = output_1[0]
    pooler = hidden_state[:, 0]
    pooler = self.pre_classifier(pooler)
    pooler = torch.nn.ReLU()(pooler)
    pooler = self.dropout(pooler)
    return output

Model B

class BERTClassB(torch.nn.Module):
def init(self):
super(BERTClassB, self).init()
self.l1 = BertModel.from_pretrained(“bert-base-uncased”)
self.pre_classifier = torch.nn.Linear(768, 768)
self.dropout = torch.nn.Dropout(.3)

def forward(self, input_ids, attention_mask):
    output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
    hidden_state = output_1[0]
    pooler = hidden_state[:, 0]
    pooler = self.pre_classifier(pooler)
    pooler = torch.nn.ReLU()(pooler)
    pooler = self.dropout(pooler)
    return output

Train function

def train(epoch):
tr_loss = 0
n_correct = 0
nb_tr_steps = 0
nb_tr_examples = 0
model.train()

for _,data1  in enumerate(training_loader1 , 0):
    for _,data2  in enumerate(training_loader2 , 0):
        ids = data1['ids'].to(device, dtype = torch.long)
        mask = data1['mask'].to(device, dtype = torch.long)
        targets1 = data1['targets'].to(device, dtype = torch.long)

        ids2 = data2['ids'].to(device, dtype = torch.long)
        mask2 = data2['mask'].to(device, dtype = torch.long)
        targets2 = data2['targets'].to(device, dtype = torch.long)



        outputs1 = modelBERT(ids, mask)
        outputs2 = modelBERT2(ids2, mask2)

        outputs = torch.cat((outputs1, outputs2), dim=1)

        

        
        ensamble_output = model(outputs)


        loss = loss_function(ensamble_output, targets1)
        tr_loss += loss.item()
        big_val, big_idx = torch.max(ensamble_output.data, dim=1)
        n_correct += calcuate_accu(big_idx, targets1)

        nb_tr_steps += 1
        nb_tr_examples+=targets1.size(0)
    
        if _%100==0:
            loss_step = tr_loss/nb_tr_steps
            accu_step = (n_correct*100)/nb_tr_examples 
            print(f"Training Loss per 100 steps: {loss_step}")
            print(f"Training Accuracy per 100 steps: {accu_step}")

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

    print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Training Loss Epoch: {epoch_loss}")
    print(f"Training Accuracy Epoch: {epoch_accu}")

    return 

for epoch in range(EPOCHS):
train(epoch)

Ensamble function

class MyEnsemble(torch.nn.Module):
def init(self, modelA, modelB):
super(MyEnsemble, self).init()
self.modelA = modelA
self.modelB = modelB
self.classifier = torch.nn.Linear(768, 3)

def forward(self, x1, x2):
    x1 = self.modelA(x1)
    x2 = self.modelB(x2)
    x = torch.cat((x1, x2), dim=1)
    x = self.classifier(F.relu(x))
    return x

modelA = BERTClass()
modelB = BERTClassB()

model = MyEnsemble(modelA, modelB)
model.to(device)

I assume you would like to pass batches from training_loader1 as x1 and batches from training_loader2 as x2 to the model. If so, you could either iterate both loaders via zip or load the batches manually via:

iter_loader1 = iter(training_loader1)
batch1 = next(iter_loader1)
...
1 Like

Thanks a lot Ptrblck. Your answer worked with me.
Please help me with this error
TypeError: not all arguments converted during string formatting


Training function

for i in range(TRAIN_BATCH_SIZE):

    iter_loader1 = iter(training_loader1)
    batch1 = next(iter_loader1)
    iter_loader2 = iter(training_loader2)
    batch2 = next(iter_loader2)
    
    
    ids1 = batch1['ids'].to(device, dtype = torch.long)
    mask1 = batch1['mask'].to(device, dtype = torch.long)
    targets1 = batch1['targets'].to(device, dtype = torch.long)
    
    
    ids2 = batch2['ids'].to(device, dtype = torch.long)
    mask2 = batch2['mask'].to(device, dtype = torch.long)
    targets2 = batch2['targets'].to(device, dtype = torch.long)
    
    outputs = model(ids1, mask1, ids2, mask2)

    print(outputs)
    loss = loss_function(outputs, targets2)
    print(loss)
    tr_loss += loss.item()
    print(tr_loss)
    big_val, big_idx = torch.max(outputs.data, dim=1)
    print(big_val, big_idx)
    n_correct += calcuate_accu(big_idx, targets2)

    nb_tr_steps += 1
    print(nb_tr_steps)
    nb_tr_examples+=targets2.size(0)
    print(nb_tr_examples)
    #print (_)
    
    if _%100==0:
        loss_step = tr_loss/nb_tr_steps
        accu_step = (n_correct*100)/nb_tr_examples 
        print(f"Training Loss per 100 steps: {loss_step}")
        print(f"Training Accuracy per 100 steps: {accu_step}")

    optimizer.zero_grad()
    loss.backward()
    # # When using GPU
    optimizer.step()

print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
epoch_loss = tr_loss/nb_tr_steps
epoch_accu = (n_correct*100)/nb_tr_examples
print(f"Training Loss Epoch: {epoch_loss}")
print(f"Training Accuracy Epoch: {epoch_accu}")

return

The new error is raised by pandas and I don’t know which line of code is causing the issue.
My guess would be pandas might be used for some preprocessing, which might fail?

1 Like

This line caused the error:
if _%100==0:

When I print the result for _ using one model without ensambling, it is just an integer number, but after I made my update with ensambling, this variable _ is a dataframe. But I don’t know how to fix it.

The _ might refer to the last return value in this case, wouldn’t it?
Change the _ to an explicit variable which tracks the iterations and it should work.

1 Like

Thanks a lot Ptrblck… your answers very helpful