Out of memory error

I’m constantly getting
RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1512386481460/work/torch/lib/THC/generic/THCTensorMathPairwise.cu:102

I’ve decreased batch_size to 2, lstm layers to 2 and still this error.
I’m trying to implement siamese model with a custom loss.
My model defined as follows

class Model(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        self.embedding = nn.Embedding(ru.vectors.size(0), ru.vectors.size(1), padding_idx=2)
        self.embedding.weight = nn.Parameter(ru.vectors)
        self.embedding.weight.requires_grad = False
        self.lstm = nn.LSTM(300, 300, 2, batch_first=True, bidirectional=True, dropout=.05)
        self.dropout = nn.Dropout(p=.2)
            #nn.LSTM(300, 300, 30, batch_first=True, bidirectional=True, dropout=.05),

        self.fc1 = nn.Sequential(
            nn.Linear(300*2*300, 1500),

            nn.Linear(1500, 1000),
            nn.Linear(1000, 500),

            nn.Linear(500, 5))

    def forward_once(self, x):
        output = self.embedding(x)
        #print('embs', output)
        output, _ = self.lstm(output)
        #print('lstm', output)
        output = self.dropout(output)
        #print('drop', output)
        output = output.view(output.size()[0], -1)
        #print('view', output)
        output = self.fc1(output)
        return output

    def forward(self, input1, input2):
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        return output1, output2

And loss:

class ContrastiveLoss(torch.nn.Module):
    Contrastive loss function.
    Based on: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf

    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2)
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))

        return loss_contrastive

I’m using pretrained fasttext vectors as embeddings matrix, 1.5kk to 300.

Can anyone help me please :frowning:

Can’t fix it anyway. It appears after the first batch, at the backpropagation to loss at second batch. No matter how big or small sizes of batch and number of workers are.