DataParallel not using both GPUs equally

Hi everyone, I have a problem with my DataParallel model. I am wrapping my model with DataParallel, as I am trying to take advantage of 2 GPUs I have. However, when I check the GPUs performance when training the model, I notice that my model is using exclusively only one GPUs. I am not sure why is this happening, and I was wondering whether someone knows how to fix this issue.

class SummModel(nn.Module):

    def __init__(self, hidden_size=768):
        super(SummModel, self).__init__()

        self.hidden_size = hidden_size
        self.encoder = BertModel.from_pretrained('bert-base-uncased')

    Method that represents one forward step in the model.
        - text_id : list of tokenized texts
        - summary_id : list of tokenized summaries
        - candidate_id : list of list of N candidates

    def forward(self, text_id, candidate_id, summary_id):
        batch_size = len(text_id)  # number of tokenized texts
        pad_id = 0  # pad symbol for Bert

        # document embedding
        mask = ~(text_id == pad_id)
        last_hidden_state = self.encoder(text_id, attention_mask=mask)[0]  # taking the state of the last layer in Bert model
        doc_emb = last_hidden_state[:, 0, :]
        assert doc_emb.size() == (batch_size, self.hidden_size)

        # summary embedding
        mask = ~(summary_id == pad_id)
        last_hidden_state = self.encoder(summary_id, attention_mask=mask)[0]
        sum_emb = last_hidden_state[:, 0, :]
        assert sum_emb.size() == (batch_size, self.hidden_size)

        # golden summary score
        summary_score = torch.cosine_similarity(sum_emb, doc_emb, dim=-1)

        # candidate embedding
        candidate_num = candidate_id.size(1)
        candidate_id = candidate_id.view(-1, candidate_id.size(-1))  # reshaping the tensor

        mask = ~(candidate_id == pad_id)
        last_hidden_state = self.encoder(candidate_id, attention_mask=mask)[0]
        candidate_emb = last_hidden_state[:, 0, :].view(batch_size, candidate_num, self.hidden_size)
        assert candidate_emb.size() == (batch_size, candidate_num, self.hidden_size)

        # calculate candidate scores
        doc_emb = doc_emb.unsqueeze(1).expand_as(candidate_emb)
        candidate_scores = torch.cosine_similarity(candidate_emb, doc_emb, dim=-1)
        assert candidate_scores.size() == (batch_size, candidate_num)

        return {"candidate_scores": candidate_scores, "summary_score": summary_score}

EDIT: Fixed it by increasing batch size from 1 to 2. :slight_smile: