class BertIntermediate(nn.Module):
def __init__(self, config):
super().__init__()
self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
if isinstance(config.hidden_act, str):
self.intermediate_act_fn = ACT2FN[config.hidden_act]
else:
self.intermediate_act_fn = config.hidden_act
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = self.dense(hidden_states)
hidden_states = self.intermediate_act_fn(hidden_states)
return hidden_states
How to free the memory after passing input hidden_states = self.dense(hidden_states)
self.dense =nn.Linear(config.hidden_size, config.intermediate_size)
consumes 67 MB for shape of 1900 X 512.
because it keeps on accumulating every-time, I pass new sample for inference
Is there a way to free this memory, because calling everytime this on new sample keeps on accumulating this memory and results in memory leakage
@ptrblck
I cannot reproduce any memory leak and see the expected memory usage:
import torch
import torch.nn as nn
class BertIntermediate(nn.Module):
def __init__(self):
super().__init__()
self.dense = nn.Linear(1024, 1024)
self.intermediate_act_fn = nn.ReLU()
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
print("start of forward, {:.3f}MB allocated".format(torch.cuda.memory_allocated()/1024**2))
hidden_states = self.dense(hidden_states)
print("after self.dense, {:.3f}MB allocated".format(torch.cuda.memory_allocated()/1024**2))
hidden_states = self.intermediate_act_fn(hidden_states)
print("after act, {:.3f}MB allocated".format(torch.cuda.memory_allocated()/1024**2))
return hidden_states
print("start, {:.3f}MB allocated".format(torch.cuda.memory_allocated()/1024**2))
# start, 0.000MB allocated
device = "cuda"
model = BertIntermediate().to(device)
x = torch.randn(1024, 1024, device=device)
print("after setup, {:.3f}MB allocated".format(torch.cuda.memory_allocated()/1024**2))
# after setup, 8.004MB allocated
with torch.no_grad():
out = model(x)
print("after forward, {:.3f}MB allocated".format(torch.cuda.memory_allocated()/1024**2))
# start of forward, 8.004MB allocated
# after self.dense, 20.129MB allocated
# after act, 20.129MB allocated
# after forward, 20.129MB allocated
with torch.no_grad():
out = model(x)
print("after forward, {:.3f}MB allocated".format(torch.cuda.memory_allocated()/1024**2))
# start of forward, 20.129MB allocated
# after self.dense, 24.129MB allocated
# after act, 24.129MB allocated
# after forward, 20.129MB allocated
with torch.no_grad():
out = model(x)
print("after forward, {:.3f}MB allocated".format(torch.cuda.memory_allocated()/1024**2))
# start of forward, 20.129MB allocated
# after self.dense, 24.129MB allocated
# after act, 24.129MB allocated
# after forward, 20.129MB allocated
not the cuda memory but virtual memory @ptrblck