Concatenation the output of Bert and transformer

I want to feed two models with different features, then how I can concatenate the output of BERT dense layer with the output of the transformer, then pass the concatenated output to other fully connected layer, then to the softmax layer?

In other words, how to train ensemble pytorch models in two different data loaders

def train(epoch):
tr_loss = 0
n_correct = 0
nb_tr_steps = 0
nb_tr_examples = 0
for _,data in enumerate(training_loader3, 0):
ids = data[‘ids’].to(device, dtype = torch.long)
mask = data[‘mask’].to(device, dtype = torch.long)
targets = data[‘targets’].to(device, dtype = torch.long)
outputs = model(ids, mask)
loss = loss_function(outputs, targets)
tr_loss += loss.item()
big_val, big_idx = torch.max(, dim=1)
n_correct += calcuate_accu(big_idx, targets)

    nb_tr_steps += 1
    if _%100==0:
        loss_step = tr_loss/nb_tr_steps
        accu_step = (n_correct*100)/nb_tr_examples 
        print(f"Training Loss per 100 steps: {loss_step}")
        print(f"Training Accuracy per 100 steps: {accu_step}")

    # # When using GPU

print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
epoch_loss = tr_loss/nb_tr_steps
epoch_accu = (n_correct*100)/nb_tr_examples
print(f"Training Loss Epoch: {epoch_loss}")
print(f"Training Accuracy Epoch: {epoch_accu}")


Training the BERT model.

for epoch in range(EPOCHS):