Hi, I have a very simple script shown below. However, it seems to be causing memory leak, as every iteration increases the reserved GPU memory by 2MB until no memory is available and it crashes. I’m not sure what is happening here… Any help is appreciated. Big thanks in advance! Btw, I’m using Pytorch 1.7.0.
import torch
torch.autograd.set_detect_anomaly(True)
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class DenseLayer(nn.Linear):
def __init__(self, in_dim: int, out_dim: int, activation: str = "relu", *args, **kwargs) -> None:
self.activation = activation
super().__init__(in_dim, out_dim, *args, **kwargs)
def reset_parameters(self) -> None:
torch.nn.init.xavier_uniform_(self.weight, gain=torch.nn.init.calculate_gain(self.activation))
if self.bias is not None:
torch.nn.init.zeros_(self.bias)
class Geometry(nn.Module):
def __init__(self, D, W, input_ch, skips):
super().__init__()
self.D = D
self.W = W
self.input_ch = input_ch
self.skips = skips
self.pts_linears = nn.ModuleList(
[DenseLayer(input_ch, W, activation="relu")] + [DenseLayer(W, W, activation="relu") if i not in self.skips else DenseLayer(W + input_ch, W, activation="relu") for i in range(D-1)])
self.outputs_linear = DenseLayer(W, 1+W, activation="linear")
def forward(self, input_pts):
h = input_pts
for i, l in enumerate(self.pts_linears):
h = self.pts_linears[i](h)
h = F.relu(h)
if i in self.skips:
h = torch.cat([input_pts, h], -1)
return self.outputs_linear(h)
def gradients(self, x):
x.requires_grad_(True)
y = self.forward(x)[...,:1]
d_output = torch.ones_like(y, requires_grad=False, device=y.device)
gradients = torch.autograd.grad(
outputs=y,
inputs=x,
grad_outputs=d_output,
create_graph=True,
retain_graph=True,
only_inputs=True)[0]
return gradients
geo_net = Geometry(8, 256, 3, [4,]).cuda()
for i in range(5000):
print('cuda mem (MB): ', i, torch.cuda.memory_reserved()/1e6)
pts = torch.rand(1024, 3).cuda()
grad = geo_net.gradients(pts)
loss = grad.norm(dim=-1).mean()
loss.backward()