Hey guys, I am facing a problem with a custom network of mine. It basically a multi-layer perceptron, but for each layer weight matrix say W, I factorise its flattened version as vec(W) = Uv, where U is a given matrix and I only want to learn v.
I want to do multidimensional regression, so I am using the usual mse loss. During training I get the following error:
“Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling backward the first time.”
If I retain_graph it runs ok, but I see no reason why I would get this error. I am only backpropagating once. Any ideas on why this might occur?
Here is my code for the layer and the network. The details about how I create the matrix U are not important.
class InvariantLinear(torch.nn.Module):
def __init__(self, in_features, out_features, filter_dim, bias=True):
super().__init__()
self.in_features = in_features
self.out_features = out_features
self.filter_dim = filter_dim
self.bias = bias
self.v = torch.nn.Parameter(torch.randn(filter_dim))
self.bias = torch.nn.Parameter(torch.randn(out_features))
l = int(self.in_features * self.out_features)
self.U_conv = torch.empty(l, filter_dim)
self.U_lin = torch.empty(l, filter_dim)
self.pi = torch.nn.Parameter(torch.tensor(1 / 2))
for i in range(int(l / filter_dim)):
self.U_lin[filter_dim * i:filter_dim * (i + 1), :] = torch.diag(torch.tensor([1 for i in range(filter_dim)]))
if (i % 2) == 0:
self.U_conv[filter_dim * i:filter_dim * (i + 1), :] = torch.diag(
torch.tensor([1 for i in range(filter_dim)]))
else:
self.U_conv[filter_dim * i:filter_dim * (i + 1), :] = torch.diag(
torch.tensor([0 for i in range(filter_dim)]))
def forward(self, x):
U = self.pi * self.U_conv + (1 - self.pi) * self.U_lin
weight = torch.matmul(U, self.v)
weight = weight.view(self.out_features, self.in_features)
output = x @ weight.t()+self.bias
return output
class Densenet(torch.nn.Module):
def __init__(self, input_size, output_size, filter_dim, width, num_layers):
super().__init__()
self.input_layer = InvariantLinear(input_size, width, filter_dim)
self.hidden_layer1 = InvariantLinear(width, width, filter_dim)
self.hidden_layer2 = InvariantLinear(width, width, filter_dim)
self.output_layer = InvariantLinear(width, output_size, filter_dim)
self.act = torch.nn.ReLU()
def forward(self, x):
x1 = self.input_layer(x)
x2 = self.act(x1)
x3 = self.hidden_layer1(x2)
x4 = self.act(x3)
x5 = self.hidden_layer2(x4)
x6 = self.act(x5)
y = self.output_layer(x6)
return y