Can't find inplace operation that messep up backwards()

jubick · April 9, 2020, 3:41pm

I get this error

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [5, 100]] is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

I can’t find what’s exactly wrong.
Here is stack trace:

C:\Users\matty\PycharmProjects\my-proto-tc\proto_net.py:82: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  sentence_tensors.append(torch.tensor(encoded_support_set[i][j].clone().detach().requires_grad_(True)))
Warning: Traceback of forward call that caused the error:
  File "C:/Users/matty/PycharmProjects/my-proto-tc/train.py", line 29, in <module>
    outputs = model.forward(x_in, x_ood, support_set, labels)
  File "C:\Users\matty\PycharmProjects\my-proto-tc\proto_net.py", line 89, in forward
    dists = self.distance_function(x_in, prototypes)
  File "C:\Users\matty\PycharmProjects\my-proto-tc\proto_net.py", line 47, in cosine_similarity
    return F.cosine_similarity(x, y)
 (print_stack at ..\torch\csrc\autograd\python_anomaly_mode.cpp:57)

jubick · April 9, 2020, 3:42pm

I can provide full code if it’s neccesary

ptrblck · April 10, 2020, 5:28am

Yes, please provide an (executable) code snippet so that we can take a look.

jubick · April 10, 2020, 7:34am

import torch
from torch import nn
import torch.nn.functional as F

ALPHA = 10
BETTA = 1
GAMMA = 1.0
M_1 = 0.4
M_2 = 0.8


class CNNEncoder(nn.Module):
    def __init__(self, embed_dim: int, vocab_size: int):
        super(CNNEncoder, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.conv1 = nn.Conv2d(1, 100, (2, embed_dim))

    def forward(self, x):
        x = self.embed(x)
        x = x.unsqueeze(0)
        x = x.unsqueeze(0)
        x = self.conv1(x)
        x = x.squeeze(3)
        x = torch.tanh(x)
        x = F.avg_pool1d(x, x.size()[2]).squeeze(2)
        return x


def cosine_similarity(x, y):
    return F.cosine_similarity(x, y)


class ProtoNet(nn.Module):
    def __init__(self):
        super(ProtoNet, self).__init__()

    def forward(self, x_in, x_ood, support_set, labels):
        prototypes = support_set.mean(1)
        return prototypes


class OProtoNet(nn.Module):
    def __init__(self, cnn_encoder: CNNEncoder, distance_function=cosine_similarity):
        super(OProtoNet, self).__init__()
        self.cnn_encoder = cnn_encoder
        self.proto_network = ProtoNet()
        self.distance_function = distance_function

    def forward(self, x_in, x_ood, support_set, labels):
        x_in = self.cnn_encoder.forward(x_in)
        x_ood = self.cnn_encoder.forward(x_ood)

        encoded_support_set = []

        for class_row in support_set.split(1):
            encoded_sents = []
            for sentence in class_row.squeeze_().split(1):
                encoded_sents.append(self.cnn_encoder.forward(sentence.squeeze_()))
            encoded_support_set.append(encoded_sents)

        class_tensors = []
        for i in range(support_set.shape[0]):
            sentence_tensors = []
            for j in range(support_set.shape[1]):
                sentence_tensors.append(torch.tensor(encoded_support_set[i][j].clone().detach().requires_grad_(True)))
            class_tensors.append(torch.cat(sentence_tensors))

        support_set = torch.stack(class_tensors)

        prototypes = self.proto_network.forward(x_in, x_ood, support_set, labels)

        dists = self.distance_function(x_in, prototypes)
        log_p_y = F.log_softmax(-dists * ALPHA, dim=0)

        loss_in = -log_p_y[0]

        max_dist = torch.max(self.distance_function(x_ood, prototypes) - M_1)

        zero_tensor = torch.FloatTensor([0.0])

        loss_ood = torch.max(max_dist, zero_tensor)

        dist = M_2 - self.distance_function(x_in, prototypes[0].unsqueeze_(0))

        loss_gt = torch.max(zero_tensor.expand_as(dist), dist)

        return loss_in + BETTA * loss_ood.item() + GAMMA * loss_gt.item()

ptrblck · April 10, 2020, 7:55am

Thanks for the code!
Could you post some dummy input tensors with the right shape and type to run the code?

jubick · April 10, 2020, 8:08am

Sure

cnn = CNNEncoder(300, 200000)
model = OProtoNet(cnn)

torch.autograd.set_detect_anomaly(True)
optimizer = optim.Adam(model.parameters())

x_in = torch.LongTensor(50)
x_ood = torch.LongTensor(50)
support_set = torch.LongTensor(5, 20, 50)
labels = torch.arange(0, 6)

optimizer.zero_grad()
outputs = model.forward(x_in, x_ood, support_set, labels)
outputs.backward()
optimizer.step()

ptrblck · April 10, 2020, 8:27am

Thanks for the code.
The offending call should be the inplace unsqueeze of prototypes[0].
Change it to this and it should work:

dist = M_2 - self.distance_function(x_in, prototypes[0].unsqueeze(0))

jubick · April 10, 2020, 8:42am

It worked, thanks a lot!