Is there a way to handle memory appropriately?

skyunyoo · July 14, 2019, 4:40am

I’m dealing with MRI data, and I converted these files into numpy files and saved them.
my data consisted of input : [3600, 512, 512] (N,H,W) -1.8G and mask : [3600,8,512,512] (N,classes,H,W)-28.125G.
I’m using U-Net for segmentation and GPU is ‘NVIDIA GeForce RTX 2080Ti’ with 18G ram.

Data has been loaded as follows:

class trainDataset(torch.utils.data.Dataset):
    def __init__(self, data, target, transform=None):
        self.data = data.astype(np.float32)
        self.data = normalize(data)
        self.target = target.astype(np.float32)
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        
        if self.transform:
            x = self.transform(x)
            
        return x, y
    
    def __len__(self):
        return len(self.data)

numpy_data = np.load(image_path+'MRtrain.npy')
numpy_target = np.load(mask_path+'RStrain.npy')

traindataset = trainDataset(numpy_data, numpy_target ,transform = transform)         

trainloader = torch.utils.data.DataLoader(traindataset, batch_size = batch_size, shuffle=True, num_workers=0, pin_memory=False)

and train loop as follow :

def fit(epoch,model,data_loader,phase='train',volatile=False):
    if phase == 'train':
        exp_lr_scheduler.step()
        model.train()
    if phase == 'valid':
        model.eval()
    running_loss = 0.0
    for batch_idx , (data,target) in enumerate(data_loader):
        inputs,target = data.cpu(),target.cpu()
        if is_cuda:
            inputs,target = data.cuda(),target.cuda()
        inputs , target = Variable(inputs),Variable(target)
        if phase == 'train':
            optimizer.zero_grad()
            
        output = model(inputs)
        pred = torch.sigmoid(output)
        loss = dice(pred,target)

        running_loss += loss.data.item()
        
        if phase == 'train':
            loss.backward()
            optimizer.step()
    
    loss = running_loss/len(data_loader.dataset)
    
    print('{} Dice_Loss: {:.4f}'.format(
                phase, loss))
    return loss

Training to 515x515 size will cause cuda memory error.
I think it’s inefficient to deal with numpy file, but is there a good way?

ptrblck · July 14, 2019, 12:02pm

I assume you are running out or memory or are you really seeing a MemoryError?
In the former case, could you try to lower the number of kernels in your UNet and check the memory usage?
The OOM shouldn’t be related to loading the data as numpy arrays.

PS: based on the target shape, I assume you are dealing with a multi-label classification, i.e. each pixel might correspond to multiple classes?

skyunyoo · July 15, 2019, 1:21am

I got error massage as follow
and memory usage is about 97%(14.8G),
GPU memory usage is about 6.8G :

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-10-f1bcb8a65b63> in <module>
     18     print('-' * 10)
     19     epoch_loss = fit(epoch,model,trainloader,phase='train')
---> 20     val_epoch_loss = fit(epoch,model,validloader,phase='valid')
     21     train_losses.append(epoch_loss)
     22     val_losses.append(val_epoch_loss)

<ipython-input-8-cb42f6cac88b> in fit(epoch, model, data_loader, phase, volatile)
     14             optimizer.zero_grad()
     15 
---> 16         output = model(inputs)
     17         pred = torch.sigmoid(output)
     18         loss = dice(pred,target)

C:\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

<ipython-input-5-ef5c656e99e9> in forward(self, x)
     45 
     46         x = self.dconv_up3(x)
---> 47         x = self.upsample(x)
     48         x = torch.cat([x, conv2], dim=1)
     49 

C:\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    491             result = self._slow_forward(*input, **kwargs)
    492         else:
--> 493             result = self.forward(*input, **kwargs)
    494         for hook in self._forward_hooks.values():
    495             hook_result = hook(self, input, result)

C:\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\upsampling.py in forward(self, input)
    129     @weak_script_method
    130     def forward(self, input):
--> 131         return F.interpolate(input, self.size, self.scale_factor, self.mode, self.align_corners)
    132 
    133     def extra_repr(self):

C:\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\functional.py in interpolate(input, size, scale_factor, mode, align_corners)
   2561         raise NotImplementedError("Got 4D input, but linear mode needs 3D input")
   2562     elif input.dim() == 4 and mode == 'bilinear':
-> 2563         return torch._C._nn.upsample_bilinear2d(input, _output_size(2), align_corners)
   2564     elif input.dim() == 4 and mode == 'trilinear':
   2565         raise NotImplementedError("Got 4D input, but trilinear mode needs 5D input")

RuntimeError: CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 11.00 GiB total capacity; 8.19 GiB already allocated; 174.59 MiB free; 96.81 MiB cached)

as you’ve advised, lowering the number of kernels in my UNet work! Thanks!

I tried to do a multi-organ classification, but now I want to do one organ classification.
my numpy file shapes are image : [3600,512,512] , mask : [3600,512,512]
input tensor is image : [4,512,512] , mask : [4,512,512] divied by batch
but output tensor is [4,1,512,512].
I think class should be 2 because of background+organ, but I don’t know how to include background in mask class.
Do you have a good idea?

ptrblck · July 15, 2019, 9:05am

Good to hear it’s working now.

Is the input shape defined as [batch_size, height, width] or [channels, height, width]?
In either case, you should pass an input as [batch_size, channels, height, width] to conv layers.
If you are dealing with single channel images, just unsqueeze dim1:

input = input.unsqueeze(1)

For nn.CrossEntropyLoss (and nn.NLLLoss), the target should contain class indices in the shape [batch_size, height, width].
I assume your target already contains the background class as some class index (e.g. class0)?
If so, just transform all other classes to the background class:

background_class_index = 0
desired_class_index = 1
target[target!=desired_class_index] = background_class_index

skyunyoo · July 15, 2019, 10:22am

Thanks for your Reply!

Unfortunately my target doesn’t contain the background class as some class index.
All of my targets consist of a pixel value 0 (background) or other(desire target).

ex. one of 3x3 size target array consists of
[0 1 0
0 1 1
0 0 1] <- stack 180 slice and batch_size 4 : target shape : [4,3,3] ; [batch_size, height, width]

I this case, If I apply the transform method that you suggested without having to process a pixel with a value of 0, will it have two classes?

ptrblck · July 15, 2019, 10:40am

That would mean, that the background class is the class index 0.

In my example I created a target containing only two valid classes:

background with class index 0
not-background with class index 1

If your non-background class uses another index than 1, you should also convert it to 1 for a binary classification use case.

skyunyoo · July 16, 2019, 4:25am

Class index has been well solved as your reply.
But I had another problem…

for x in [numpy_data, numpy_target]:
    print(x.min(), x.max())

When I loaded numpy files having shape (480,512,512),
I got for input min: 0, max: 499
target min: 0, max: 3

I use Normalize function as follow :

def normalize(img):   
    arr = img.copy().astype(np.float32)
    M = np.float32(np.max(img))
    if M != 0:
        arr *= 1./M
    return arr

And i use Dataset as follow:

class trainDataset(torch.utils.data.Dataset):
    def __init__(self, data, target, transform=None):
        self.data = data.astype(np.float32)
        self.data = normalize(data)

        self.target = target.astype(np.float32)
        self.target = normalize(target)

        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        
        if self.transform:
            x = self.transform(x)

        return x, y
    
    def __len__(self):
        return len(self.data)

transform = transforms.Compose([transforms.ToPILImage(mode=None),
                                                       transforms.Resize(512),
                                                       transforms.ToTensor()
                                                       ])

traindataset = trainDataset(train_numpy_data, train_numpy_target ,transform = transform)
validdataset = trainDataset(valid_numpy_data, valid_numpy_target ,transform = transform)                

trainloader = torch.utils.data.DataLoader(traindataset, batch_size = batch_size, shuffle=True, num_workers=0, pin_memory=False)
validloader = torch.utils.data.DataLoader(validdataset, batch_size = batch_size, shuffle=True, num_workers=0, pin_memory=False)

inputs, masks = next(iter(trainloader))
print(inputs.shape, masks.shape)
print(inputs.min(), inputs.max())
print(masks.min(), masks.max())

I got :
torch.Size([4, 1, 512, 512]) torch.Size([4, 512, 512])
tensor(0.) tensor(0.7675)
tensor(0.) tensor(0.)

I don’t know why min-max value doesn’t get 0-1…

It’s annoying, but i’d really appreciate it if you help me once more.
Thank you!

ptrblck · July 16, 2019, 9:16am

Using your code, I get normalized value in the range [0, 1]:

train_numpy_data = np.random.randint(0, 500, (480, 512, 512))
train_numpy_target = np.random.randint(0, 4, (480, 512, 512))
    
traindataset = trainDataset(train_numpy_data, train_numpy_target ,transform = transform)
trainloader = torch.utils.data.DataLoader(traindataset, batch_size = 4, shuffle=True, num_workers=0, pin_memory=False)

inputs, masks = next(iter(trainloader))
print(inputs.shape, masks.shape)
> torch.Size([4, 1, 512, 512]) torch.Size([4, 512, 512])
print(inputs.min(), inputs.max())
> tensor(0.) tensor(1.)
print(masks.min(), masks.max())
> tensor(0.) tensor(1.)

However, you are also normalizing the masks, which would be wrong for a classification use case.
If you would like to get rid of unwanted classes, you should follow the code snippet given before:

background_class_index = 0
desired_class_index = 3
train_numpy_target[train_numpy_target!=desired_class_index] = background_class_index
train_numpy_target[train_numpy_target==desired_class_index] = 1

skyunyoo · July 17, 2019, 1:54am

oh, problem is numpy type.
The type was int64, but it changed to int32, so it was nomalized as you suggested.
Thanks you!!