Hi,
I have a pytorch module, whose intermediate variables occupy a lot of memory.
In order to prevent the “CUDA [from being] out of memory” I’ve deleted one of the intermediate tensors dist
to free up GPU memory via del dist
. I was wondering whether deleting this tensor in such a way also deletes / disfigures the corresponding portion of the computational graph that autograd is building up in the forward pass?
class E_step(nn.Module):
def __init__(self, std):
super(E_step, self).__init__()
self.std = std
def forward(self, pi, mean, features):
# uses current parameter estimates to evaluate the responsibilities
#
# INPUTS:
# 1) pi: (tensor: shape = [N, K])
# 2) mean: (tensor: shape = [N, K, F])
# 3) features: (tensor: shape = [N, D, F])
#
# RETURNS:
# 1) log_posteriors: (tensor: shape = [N, D, K])
N, K, F = mean.shape
N, D, F = features.shape
dist = torch.empty([N, D, K], out = torch.cuda.FloatTensor())
for k in range(K):
dist[:,:,k] = torch.norm(mean[:, k, :] - features, p = 2, dim = 2)
log_probs_plus_log_pi = (-0.5 * F * torch.cuda.FloatTensor([2 * math.pi * self.std**2] + dist).log() + pi.log()
############### deleting this tensor to free up GPU memory ###############
del dist
#################################################################
log_probs_plus_log_pi = log_probs_plus_log_pi.transpose(0, 1)
return log_probs_plus_log_pi - torch.logsumexp(log_probs_plus_log_pi, dim = 2, keepdim = True)