Here is the full error message (it’s long). In this example, the code trips on BatchNorm when it tries to execute preds = torch.sigmoid(model(X_i))
in the check_accuracy
function I posted previously; I have also seen it trip on Conv2d:
OutOfMemoryError Traceback (most recent call last)
d:\rsna-2023-abdominal-trauma-detection\test.py in line 5
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/test.py?line=0'>1</a> import train
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/test.py?line=2'>3</a> if __name__ == '__main__':
----> <a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/test.py?line=4'>5</a> train.main()
File d:\rsna-2023-abdominal-trauma-detection\train.py:116, in main()
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/train.py?line=112'>113</a> save_checkpoint(checkpoint)
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/train.py?line=114'>115</a> # check accuracy/DICE
--> <a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/train.py?line=115'>116</a> check_accuracy(val_loader, model, device=config.DEVICE)
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/train.py?line=117'>118</a> save_predictions_as_imgs(val_loader, model, folder='saved_images/', device=config.DEVICE)
File d:\rsna-2023-abdominal-trauma-detection\utils.py:89, in check_accuracy(loader, model, device)
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/utils.py?line=86'>87</a> y_i = y.squeeze(0).int().to(device)
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/utils.py?line=87'>88</a> print('After data allocation', torch.cuda.memory_allocated())
---> <a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/utils.py?line=88'>89</a> preds = torch.sigmoid(model(X_i))
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/utils.py?line=89'>90</a> print('After evaluation', torch.cuda.memory_allocated())
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/utils.py?line=90'>91</a> preds = (preds > 0.5).float()
File c:\Users\probi\anaconda3\envs\kaggle\lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1495'>1496</a> # If we don't have any hooks, we want to skip the rest of the logic in
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1496'>1497</a> # this function, and just call forward.
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1497'>1498</a> if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1498'>1499</a> or _global_backward_pre_hooks or _global_backward_hooks
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1499'>1500</a> or _global_forward_hooks or _global_forward_pre_hooks):
-> <a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1500'>1501</a> return forward_call(*args, **kwargs)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1501'>1502</a> # Do not call functions when jit is used
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1502'>1503</a> full_backward_hooks, non_full_backward_hooks = [], []
File d:\rsna-2023-abdominal-trauma-detection\models.py:43, in UNET25D.forward(self, x)
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/models.py?line=40'>41</a> skip_connections = []
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/models.py?line=41'>42</a> for down_block in self.downsampling:
---> <a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/models.py?line=42'>43</a> x = down_block(x)
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/models.py?line=43'>44</a> skip_connections.append(x)
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/models.py?line=44'>45</a> x = self.pooling(x)
File c:\Users\probi\anaconda3\envs\kaggle\lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1495'>1496</a> # If we don't have any hooks, we want to skip the rest of the logic in
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1496'>1497</a> # this function, and just call forward.
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1497'>1498</a> if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1498'>1499</a> or _global_backward_pre_hooks or _global_backward_hooks
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1499'>1500</a> or _global_forward_hooks or _global_forward_pre_hooks):
-> <a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1500'>1501</a> return forward_call(*args, **kwargs)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1501'>1502</a> # Do not call functions when jit is used
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1502'>1503</a> full_backward_hooks, non_full_backward_hooks = [], []
File d:\rsna-2023-abdominal-trauma-detection\models.py:16, in DoubleConv.forward(self, x)
<a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/models.py?line=14'>15</a> def forward(self, x):
---> <a href='file:///d%3A/rsna-2023-abdominal-trauma-detection/models.py?line=15'>16</a> return self.conv_block(x)
File c:\Users\probi\anaconda3\envs\kaggle\lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1495'>1496</a> # If we don't have any hooks, we want to skip the rest of the logic in
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1496'>1497</a> # this function, and just call forward.
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1497'>1498</a> if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1498'>1499</a> or _global_backward_pre_hooks or _global_backward_hooks
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1499'>1500</a> or _global_forward_hooks or _global_forward_pre_hooks):
-> <a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1500'>1501</a> return forward_call(*args, **kwargs)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1501'>1502</a> # Do not call functions when jit is used
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1502'>1503</a> full_backward_hooks, non_full_backward_hooks = [], []
File c:\Users\probi\anaconda3\envs\kaggle\lib\site-packages\torch\nn\modules\container.py:217, in Sequential.forward(self, input)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/container.py?line=214'>215</a> def forward(self, input):
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/container.py?line=215'>216</a> for module in self:
--> <a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/container.py?line=216'>217</a> input = module(input)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/container.py?line=217'>218</a> return input
File c:\Users\probi\anaconda3\envs\kaggle\lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1495'>1496</a> # If we don't have any hooks, we want to skip the rest of the logic in
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1496'>1497</a> # this function, and just call forward.
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1497'>1498</a> if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1498'>1499</a> or _global_backward_pre_hooks or _global_backward_hooks
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1499'>1500</a> or _global_forward_hooks or _global_forward_pre_hooks):
-> <a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1500'>1501</a> return forward_call(*args, **kwargs)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1501'>1502</a> # Do not call functions when jit is used
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/module.py?line=1502'>1503</a> full_backward_hooks, non_full_backward_hooks = [], []
File c:\Users\probi\anaconda3\envs\kaggle\lib\site-packages\torch\nn\modules\batchnorm.py:171, in _BatchNorm.forward(self, input)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=163'>164</a> bn_training = (self.running_mean is None) and (self.running_var is None)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=165'>166</a> r"""
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=166'>167</a> Buffers are only updated if they are to be tracked and we are in training mode. Thus they only need to be
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=167'>168</a> passed when the update should occur (i.e. in training mode when they are tracked), or when buffer stats are
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=168'>169</a> used for normalization (i.e. in eval mode when buffers are not None).
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=169'>170</a> """
--> <a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=170'>171</a> return F.batch_norm(
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=171'>172</a> input,
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=172'>173</a> # If buffers are not to be tracked, ensure that they won't be updated
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=173'>174</a> self.running_mean
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=174'>175</a> if not self.training or self.track_running_stats
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=175'>176</a> else None,
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=176'>177</a> self.running_var if not self.training or self.track_running_stats else None,
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=177'>178</a> self.weight,
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=178'>179</a> self.bias,
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=179'>180</a> bn_training,
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=180'>181</a> exponential_average_factor,
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=181'>182</a> self.eps,
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/modules/batchnorm.py?line=182'>183</a> )
File c:\Users\probi\anaconda3\envs\kaggle\lib\site-packages\torch\nn\functional.py:2450, in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/functional.py?line=2446'>2447</a> if training:
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/functional.py?line=2447'>2448</a> _verify_batch_size(input.size())
-> <a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/functional.py?line=2449'>2450</a> return torch.batch_norm(
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/functional.py?line=2450'>2451</a> input, weight, bias, running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled
<a href='file:///c%3A/Users/probi/anaconda3/envs/kaggle/lib/site-packages/torch/nn/functional.py?line=2451'>2452</a> )
OutOfMemoryError: CUDA out of memory. Tried to allocate 9.00 GiB (GPU 0; 15.99 GiB total capacity; 10.03 GiB already allocated; 3.23 GiB free; 10.38 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
The implementation of UNet:
import torch
import torch.nn as nn
class DoubleConv(nn.Module):
def __init__(self, in_channels, out_channels):
super(DoubleConv, self).__init__()
self.conv_block = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True))
def forward(self, x):
return self.conv_block(x)
class UNET25D(nn.Module):
def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]):
super(UNET25D, self).__init__()
self.upsampling = nn.ModuleList()
self.downsampling = nn.ModuleList()
self.pooling = nn.MaxPool2d(kernel_size=2, stride=2)
# Downsampling
for feature in features:
self.downsampling.append(DoubleConv(in_channels, feature))
in_channels = feature
# Upsampling
for feature in reversed(features):
self.upsampling.append(
nn.ConvTranspose2d(feature*2, feature, kernel_size=2, stride=2)
)
self.upsampling.append(DoubleConv(feature*2, feature))
self.bottleneck = DoubleConv(features[-1], features[-1]*2)
self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)
def forward(self, x):
skip_connections = []
for down_block in self.downsampling:
x = down_block(x)
skip_connections.append(x)
x = self.pooling(x)
x = self.bottleneck(x)
skip_connections = skip_connections[::-1]
for index in range(0, len(self.upsampling), 2):
x = self.upsampling[index](x)
skip_connection = skip_connections[index//2]
concat_skip = torch.cat((skip_connection, x), dim=1)
x = self.upsampling[index + 1](concat_skip)
return self.final_conv(x)
Finally, my train
function:
def train(loader, model, optimizer, loss_fn, scaler):
loop = tqdm(loader)
for batch_idx, (data, targets) in enumerate(loop):
print('Batch ', batch_idx)
data = data.squeeze(0)
targets = targets.squeeze(0)
BATCH_SIZE = 4
mini_batches = math.ceil(data.size(0) / BATCH_SIZE)
for i in range(mini_batches):
mb_from = i * BATCH_SIZE
mb_to = min((i + 1) * BATCH_SIZE, data.size(0))
X = data[mb_from:mb_to].float().to(device=config.DEVICE, non_blocking=True)
y = targets[mb_from:mb_to].int().to(device=config.DEVICE, non_blocking=True)
# Forward prop
with torch.cuda.amp.autocast(dtype=torch.float16):
predictions = model(X)
loss = loss_fn(predictions, y)
del X
del y
gc.collect()
torch.cuda.empty_cache()
# Backward prop
optimizer.zero_grad()
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
# Update tqdm
loop.set_postfix(loss=loss.item())
loss.detach()