Hi, I am trying to train a 3D U Net. I have a 12 GB titan X pascal :
nvidia-smi
±----------------------------------------------------------------------------+
| NVIDIA-SMI 396.26 Driver Version: 396.26 |
|-------------------------------±---------------------±---------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 TITAN X (Pascal) On | 00000000:42:00.0 Off | N/A |
| 23% 31C P8 17W / 250W | 0MiB / 12196MiB | 0% E. Process |
±------------------------------±---------------------±---------------------+
±----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| No running processes found |
±----------------------------------------------------------------------------+
My trainer script is the following :
from __future__ import print_function, division
import os
import sys
import time
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torch.autograd import Variable
from data_flair import TumorSegmentationDataset
from new_models import unet
# from torchsummary import summary
#torch.cuda.empty_cache()
training_start_time = time.asctime()
startstamp = time.time()
print("\nHostname :" + str(os.getenv("HOSTNAME")))
print("\nStart Time :" + str(training_start_time))
print("\nStart Stamp:" + str(startstamp))
sys.stdout.flush()
sys.stdout.flush()
# Playing with the Dataloader object and setting it up
dataset_train = TumorSegmentationDataset("/cbica/home/bhaleram/comp_space/brats/data/flair_csv.csv")
train_loader = DataLoader(dataset_train,batch_size= 1,shuffle=True, num_workers=4)
print("Training Data : ", len(train_loader.dataset))
# Handle which model was supposed to be used.
model = unet(1,2,30)
sys.stdout.flush()
print("Current Device : ", torch.cuda.current_device())
print("Device Count on Machine : ", torch.cuda.device_count())
# print("Device Name : ", torch.cuda.get_device_name())
print("Cuda Availibility : ", torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
if device.type == 'cuda':
print('Memory Usage:')
print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3, 1),
'GB')
print('Cached: ', round(torch.cuda.memory_cached(0)/1024**3, 1), 'GB')
sys.stdout.flush()
model.cuda()
# Setting up the optimizer
optimizer = optim.Adam(model.parameters(),
lr= 0.01,
betas=(0.9, 0.99),
weight_decay=0.00005)
sys.stdout.flush()
# Setting up the loss function
def dice_loss(inp, target):
smooth = 1e-7
iflat = inp.view(-1)
tflat = target.view(-1)
intersection = (iflat * tflat).sum()
return 1 - ((2. * intersection + smooth) /
(iflat.sum() + tflat.sum() + smooth))
def dice(inp, target):
smooth = 1e-7
iflat = inp.view(-1)
tflat = target.view(-1)
intersection = (iflat * tflat).sum()
return (2*intersection+smooth)/(iflat.sum()+tflat.sum()+smooth)
loss_list = []
model.train()
# Start reading through the train loader
for batch_idx, (subject) in enumerate(train_loader):
# Load the subject and its ground truth
image = subject['image']
mask = subject['gt']
# Loading images into the GPU and ignoring the affine
image, mask = image.cuda(), mask.cuda()
# I Don't know why I do this step and
# at this point, I am too afraid to ask
image, mask = Variable(image, requires_grad = True), Variable(mask, requires_grad = True)
# Making sure that the optimizer has been reset
optimizer.zero_grad()
# Forward Propagation to get the output from the models
image = image.float()
output = model(image)
# Handling the loss
# Computing the loss function
loss = dice_loss(output.double(), mask.double())
# Back Propagation for model to learn
loss.backward()
optimizer.step()
print(batch_idx)
# Emptying cache to speedup and save space
torch.cuda.empty_cache()
loss_list.append(loss.item())
print(loss.item())
torch.save(model,'/cbica/home/bhaleram/comp_space/brats/model/mod.pt')
My dataloader script :
import nibabel as nib
import torch
from torch.utils.data.dataset import Dataset
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
import os
import random
class TumorSegmentationDataset(Dataset):
def __init__(self, csv_file):
self.df = pd.read_csv(csv_file, header = 0)
def __len__(self):
return len(self.df)
#ignore this function as I am not using it as of now
def transform(self, image, mask):
# Random horizontal flipping
if random.random() > 0.5:
image = np.fliplr(image)
mask = np.fliplr(mask)
# Random vertical flipping
if random.random() > 0.5:
image = np.flipud(image)
mask = np.flipud(mask)
# Add random rotation
if random.random() > 0.5:
image = np.rot90(image, k=1)
mask = np.rot90(mask, k=1)
# Transform to tensor
# image = TF.to_tensor(image)
# mask = TF.to_tensor(mask)
return image, mask
def __getitem__(self, index):
flair_path = self.df.iloc[index, 0]
gt_path = os.path.join(self.df.iloc[index, 1])
gt = nib.load(gt_path)
gt = gt.get_fdata()
gt = gt[0,:,:,:]
image = nib.load(flair_path)
image = image.get_fdata()
image = np.expand_dims(image,axis = 0)
gt = np.expand_dims(gt, axis = 0)
#image, gt = self.transform(image, gt)
#image = np.reshape(image.astype(np.float32), (1, 128, 128, 128))
#gt_data = np.reshape(gt.astype(np.float32), (1, 128, 128, 128))
sample = {'image': image, 'gt' : gt}
return sample
I get the following error when I try running the trainer script :
Hostname :cubic-login1
Start Time :Mon Jul 1 14:14:39 2019
Start Stamp:1562004879.648072
Training Data : 285
Current Device : 0
Device Count on Machine : 1
Cuda Availibility : True
Using device: cuda
Memory Usage:
Allocated: 0.0 GB
Cached: 0.0 GB
Traceback (most recent call last):
File "trainer_flair.py", line 115, in <module>
output = model(image)
File "/cbica/external/python/anaconda/3/envs/pytorch/1.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/cbica/comp_space/bhaleram/brats/new_scripts/new_models.py", line 58, in forward
x = self.out(x, x1)
File "/cbica/external/python/anaconda/3/envs/pytorch/1.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/cbica/comp_space/bhaleram/brats/new_scripts/seg_modules.py", line 434, in forward
x = F.leaky_relu(self.in_3(x))
File "/cbica/external/python/anaconda/3/envs/pytorch/1.0/lib/python3.6/site-packages/torch/nn/functional.py", line 1018, in leaky_relu
result = torch._C._nn.leaky_relu(input, negative_slope)
RuntimeError: CUDA out of memory. Tried to allocate 371.25 MiB (GPU 0; 11.91 GiB total capacity; 11.02 GiB already allocated; 331.06 MiB free; 653.00 KiB cached)
Is this purely because the GPU isn’t large enough or is there something in my script that is causing this problem?
Also, before running this on a 12gb gpu I ran it on a 16gb one, at that time, it ran for 1 iteration and then there was a memory overload. So, is there any way by which I can clear the memory after every iteration. (I can’t always run mt network on the 16 gig GPU because I don’t have access always)
And from the error that I have posted above it seems that the memory is running out in the last layer of the U-Net , i.e. the last convolution operation.
Please do let me know how can I solve this issue.
Thanks!!