Hello, I got a trouble with CUDA memory
I want to slice image input after ‘CUDA out of memory.’ occured.
but, after ‘CUDA out of memory.’ error , MEMORY LEAK occured
It seems like input random tensor “x” at line 79 [at inf class ,run function] didn’t free exactly.
But there’s NO way to free it.
Is there any idea to free it ??
HELP ME…
I tried below but it didn’t work…
del x
x.to(torch.device(‘cpu’)
**SYSTEM & ENV**
GPU : NVIDIA 2080 ti
OS : windows 10 + linux(centos)
pytorch 1.5.1
import torch
from torch import nn
from math import sqrt
# import torch.multiprocessing as mp
import time
def check_gpu(msg='gpu_check'):
print(f'{msg:=^60}')
# print('Memory Usage:')
print(f'Allocated:, {round(torch.cuda.memory_allocated(0)/1024**3,6)}GB')
# # print('Cached: ', round(torch.cuda.memory_cached(0)/1024**3,6), 'GB')
# import gc
# for i, obj in enumerate(gc.get_objects()):
# try:
# if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
# print(f'{i}||{type(obj)}||{obj.size()}')
# except:
# pass
class Conv_ReLU_Block(nn.Module):
def __init__(self):
super(Conv_ReLU_Block, self).__init__()
self.conv = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
return self.relu(self.conv(x))
# resnet network
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.residual_layer = self.make_layer(Conv_ReLU_Block, 18)
self.input = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
self.output = nn.Conv2d(in_channels=64, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False)
self.relu = nn.ReLU(inplace=True)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, sqrt(2. / n))
def make_layer(self, block, num_of_layer):
layers = []
for _ in range(num_of_layer):
layers.append(block())
return nn.Sequential(*layers)
def forward(self, x):
# residual = x
out = self.relu(self.input(x))
out = self.residual_layer(out)
out = self.output(out)
return out
# inference class
class inf:
# init model
def __init__(self):
check_gpu('init')
self.m = Net().to(torch.device('cuda'))
self.m.eval()
# self.m.share_memory()
# run inference with random tensor
# if tensor size (1,1,50000,1024)
# -> CUDA out of memory.
# -> memory leak occured while loop at main
#
# if tensor size (1,1,1024,1024)
# -> great
def run(self):
check_gpu('run start')
try:
with torch.no_grad():
# x = torch.rand(1, 1, 50000, 1024).cuda()
x = torch.rand(1, 1, 1024, 1024, device='cuda')
check_gpu('allocated')
x = self.m(x)
except Exception as e:
print(e)
finally:
torch.cuda.empty_cache()
def main():
inf_ins = inf()
for _ in range(100):
t1 = time.time()
inf_ins.run()
t2 = time.time()
# check_gpu('last')
print(f'time: {t2-t1}')
if __name__ == '__main__':
main()
result if use tensor (1,1,50000,1024) as input tensor
allocated cuda memory rising
============================init============================
Allocated:, 0.0GB
=========================run start==========================
Allocated:, 0.002477GB
=========================allocated==========================
Allocated:, 0.193883GB
CUDA out of memory. Tried to allocate 12.21 GiB (GPU 0; 11.00 GiB total capacity; 198.54 MiB already allocated;
time: 0.3150339126586914
=========================run start==========================
Allocated:, 0.193883GB
=========================allocated==========================
Allocated:, 0.385289GB
CUDA out of memory. Tried to allocate 12.21 GiB (GPU 0; 11.00 GiB total capacity; 394.54 MiB already allocated;
time: 0.31797289848327637
=========================run start==========================
Allocated:, 0.385289GB
=========================allocated==========================
Allocated:, 0.576695GB
CUDA out of memory. Tried to allocate 12.21 GiB (GPU 0; 11.00 GiB total capacity; 590.54 MiB already allocated;
time: 0.31999874114990234
=========================run start==========================
Allocated:, 0.576695GB
=========================allocated==========================
Allocated:, 0.768102GB
CUDA out of memory. Tried to allocate 12.21 GiB (GPU 0; 11.00 GiB total capacity; 786.54 MiB already allocated;
time: 0.3209991455078125
=========================run start==========================
Allocated:, 0.768102GB
=========================allocated==========================
Allocated:, 0.959508GB
CUDA out of memory. Tried to allocate 12.21 GiB (GPU 0; 11.00 GiB total capacity; 982.54 MiB already allocated;
time: 0.31999993324279785
if use (1,1,1024,1024) as input tensor
============================init============================
Allocated:, 0.0GB
=========================run start==========================
Allocated:, 0.002477GB
=========================allocated==========================
Allocated:, 0.006383GB
time: 1.4230003356933594
=========================run start==========================
Allocated:, 0.002477GB
=========================allocated==========================
Allocated:, 0.006383GB
time: 0.14099979400634766
=========================run start==========================
Allocated:, 0.002477GB
=========================allocated==========================
Allocated:, 0.006383GB
time: 0.13899970054626465