Error in python’s multiprocessing library

Neda · December 10, 2018, 12:51pm

@ptrblck I run the dataset based on your model, and I am getting a cuda errot. File "C:\Users\Neda\Anaconda3\lib\site-packages\torch\nn\modules\module.py", line 477, in __call__ result = self.forward(*input, **kwargs) File "U_Net_demo.py", line 110, in forward x = torch.cat((x, x_skip), dim=1) RuntimeError: CUDA error: out of memory
Do you think is it because of image size which is big or is there a memory leak in the code?

this is the code that I tried.

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.dataset import Dataset  # For custom data-sets
import torchvision.transforms as transforms
from PIL import Image
import glob
import numpy as np

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# get all the image and mask path and number of images
folder_data = glob.glob("D:\\Neda\\Pytorch\\U-net\\BMMCdata\\data\\*.tif")
folder_mask = glob.glob("D:\\Neda\\Pytorch\\U-net\\BMMCmasks\\masks\\*.tif")

# split these path using a certain percentage
len_data = len(folder_data)
print(len_data)
train_size = 0.6

train_image_paths = folder_data[:int(len_data*train_size)]
# print(train_image_paths) # output is 25 image for train
test_image_paths = folder_data[int(len_data*train_size):]
#print(test_image_paths) # output is 18 image for test

train_mask_paths = folder_mask[:int(len_data*train_size)]
test_mask_paths = folder_mask[int(len_data*train_size):]


class CustomDataset(Dataset):
    def __init__(self, image_paths, target_paths):   # initial logic happens like transform

        self.image_paths = image_paths
        self.target_paths = target_paths
        self.transforms = transforms.ToTensor()
        self.mapping = {
            85: 0,
            170: 1,
            255: 2
        }
    def mask_to_class(self, mask):
        for k in self.mapping:
            mask[mask==k] = self.mapping[k]
        return mask
    
    def __getitem__(self, index):

        image = Image.open(self.image_paths[index])
        mask = Image.open(self.target_paths[index])
        t_image = self.transforms(image)
        mask = torch.from_numpy(np.array(mask))
        mask = self.mask_to_class(mask)
        return t_image, mask

    def __len__(self):  # return count of sample we have

        return len(self.image_paths)


train_dataset = CustomDataset(train_image_paths, train_mask_paths)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=2)

test_dataset = CustomDataset(test_image_paths, test_mask_paths)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2)

#for data, target in train_loader:
    #print(torch.unique(target))
    
class BaseConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding, stride):
        super(BaseConv, self).__init__()
        self.act = nn.ReLU()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size, padding, stride)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size, padding, stride)
        
        
    def forward(self, x):
        x = self.act(self.conv1(x))
        x = self.act(self.conv2(x))
        return x
    
    
class DownConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding, stride):
        super(DownConv, self).__init__()
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv_block = BaseConv(in_channels, out_channels, kernel_size, padding, stride)
    def forward(self, x):
        x = self.pool1(x)
        x = self.conv_block(x)
        return x


class UpConv(nn.Module):
    def __init__ (self, in_channels, in_channels_skip, out_channels, kernel_size, padding, stride):
        super(UpConv, self).__init__()
        self.conv_trans1 = nn.ConvTranspose2d(in_channels, in_channels, kernel_size=2, padding=0, stride=2)
        self.conv_block = BaseConv(in_channels=in_channels + in_channels_skip, out_channels= out_channels, kernel_size=kernel_size, padding=padding, stride=stride)
        
    def forward(self, x, x_skip):
        x = self.conv_trans1(x)
        x = torch.cat((x, x_skip), dim=1)
        x= self.conv_block(x)
        return x


class UNet(nn.Module):
    def __init__(self, in_channels, out_channels, n_class, kernel_size, padding, stride):
        super(UNet, self).__init__()

        self.init_conv = BaseConv(in_channels, out_channels, kernel_size, padding, stride)

        self.down1 = DownConv(out_channels, 2 * out_channels, kernel_size, padding, stride)

        self.down2 = DownConv(2 * out_channels, 4 * out_channels, kernel_size, padding, stride)

        self.down3 = DownConv(4 * out_channels, 8 * out_channels, kernel_size, padding, stride)

        self.up3 = UpConv(8 * out_channels, 4 * out_channels, 4 * out_channels, kernel_size, padding, stride)

        self.up2 = UpConv(4 * out_channels, 2 * out_channels, 2 * out_channels, kernel_size, padding, stride)

        self.up1 = UpConv(2 * out_channels, out_channels, out_channels, kernel_size, padding, stride)

        self.out = nn.Conv2d(out_channels, n_class, kernel_size, padding, stride)

    def forward(self, x):
        # Encoder
        x = self.init_conv(x)
        x1 = self.down1(x)
        x2 = self.down2(x1)
        x3 = self.down3(x2)
        # Decoder
        x_up = self.up3(x3, x2)
        x_up = self.up2(x_up, x1)
        x_up = self.up1(x_up, x)
        x_out = F.log_softmax(self.out(x_up), 1)
        return x_out
    
    
model = UNet(in_channels=1,
             out_channels=64,
             n_class=3,
             kernel_size=3,
             padding=1,
             stride=1)

model = model.to(device)
    
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

#training loop
def main():
	for epoch in range(1):  # loop over the dataset multiple times

        
		for i, data in enumerate(train_loader, 0):
            # get the inputs
			t_image, mask = data
			t_image, mask = t_image.to(device), mask.to(device)

            # zero the parameter gradients
			optimizer.zero_grad()

           # forward + backward + optimize
			outputs = model(t_image)
			loss = criterion(outputs, mask)
			loss.backward()
			optimizer.step()

			print('Epoch {}, Loss {}'.format(epoch, loss.item()))


if __name__=='__main__':
   main()

ptrblck · December 10, 2018, 1:11pm

Well, I hope there is no leak in my code.
Could you try to lower the number of out_channels and see if the model works?
Which CUDA and cuDNN version are you currently using? There might be some differences between memory allocations for different setups I guess.

Neda · December 10, 2018, 4:04pm

@ptrblck I tried for 15 and also 5 out_chanlnes and it cause RuntimeError: Expected object of type torch.cuda.LongTensor but found type torch.cuda.ByteTensor for argument #2 'target error.

It’s NVIDIA Quadro M5000, and nvcc version is release 9.2, V9.2.148.
What do you think it’s causing the error?

ptrblck · December 10, 2018, 4:05pm

Try to convert your target to a torch.LongTensor by calling mask = mask.long().

Neda · December 10, 2018, 4:07pm

@ptrblck thank you. should I do this inside the def getitem ?

ptrblck · December 10, 2018, 4:08pm

Yeah, just put this line before the return statement.

Neda · December 10, 2018, 4:24pm

Thank you @ptrblck. It works for any number of out_channels below 41, and above that, I will get RuntimeError: CUDA error: out of memory how can I solve it to be able to run it for more than 41 out_channels.

ptrblck · December 10, 2018, 4:27pm

Either reduce the input and mask size (and increase the output later, if you need the predictions to be of the original size) or reduce the number of filters in some layers.
Currently the filters are increased and decreased using a power of 2. You could also let them increase/decrease linearly.

Neda · December 10, 2018, 4:30pm

@ptrblck I see. Thanks a lot. yeah, I will need the predictions to be the same as original input size.

Neda · December 11, 2018, 4:34pm

@ptrblck I am trying to test the model and visualize it. I saw your example here. could you please explain to me what is the threshold = 0.5 in visualize section?

also, I couldn’t understand this line as well pred_grid = make_grid(pred.permute(1, 0, 2, 3), nrow=4) thank you in advance

ptrblck · December 11, 2018, 10:14pm

The threshold was just used to get a binary prediction image. You could also try to normalize your prediction e.g. by using softmax and visualize the probability maps.

The permute calls makes sure that each channel of the (single image batch) target is visualized as an individual image in a grid. In this way you will see all class segmentation masks as separate images.

Neda · December 11, 2018, 10:41pm

thank you @ptrblck . Is there any example for visualize the probability maps?

ptrblck · December 12, 2018, 9:43am

You could slice the output channels of your predictions and visualize each image separately similar to the target.

batch_size = 1
channels = 10
h, w = 24, 24
output = torch.randn(batch_size, channels, h, w)
prob = F.softmax(output, 1)
prob_imgs = make_grid(prob.permute(1, 0, 2, 3))
plt.imshow(prob_imgs.permute(1, 2, 0))

Neda · December 12, 2018, 2:46pm

I apologise for the repeated question. I did the same, and I am getting an error. I spent a few hours but still couldn’t figure out how can I visualise segmentation results. You are producing the random numbers in `output = torch.randn(batch_size, channels, h, w) and I did replace my output prediction with this, but I got the error

 `  output = outputs(batch_size, channels, h, w)
TypeError: 'Tensor' object is not callable`
`

what do you think?

vmirly1 · December 12, 2018, 3:15pm

The error message indicates that outputs is a tensor, so you cannot call it like a funciton. You probably want to call the function that produces the output, for example if model is your PyTorch model, then you can invoke the forward pass by calling model(data) which will generate the outputs of your model.

ptrblck · December 12, 2018, 3:31pm

@vmirly1 is completely right.
Additionally to this, if you would like to visualize a prediction batch of more then one sample, you can use this code:

batch_size = 2
channels = 10
h, w = 24, 24
output = torch.randn(batch_size, channels, h, w)
prob = F.softmax(output, 1)
for p in prob:
    prob_imgs = make_grid(p.unsqueeze(1))
    plt.imshow(prob_imgs.permute(1, 2, 0))
    plt.show()

Again, I just initialized the output randomly as an example. You should use the output of your model.

Neda · December 12, 2018, 8:51pm

Thank you both. Sorry to bother again. I did change that, but I was thrown another error

return self.cpu().numpy()

RuntimeError: Can't call numpy() on Variable that requires grad. 
Use var.detach().numpy() instead.

so, I add this prob_imgs = prob_imgs.detach() and now I am getting another error dimension TypeError: Invalid dimensions for image data

This is the whole training process and testing and I am trying to visualise the probability maps. Possibly I am doing something wrong in permute. not sure where I am doing wrong!

model = UNet(in_channels=1,
             out_channels=5,
             n_class=3,
             kernel_size=3,
             padding=1,
             stride=1)

model = model.to(device)
print("Model build, starting training")
    
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

#training loop
def main():
    
    for epoch in range(1):  # loop over the dataset multiple times
        
        
    		for i, data in enumerate(train_loader, 0):
            # get the inputs
    			t_image, mask = data
    			t_image, mask = t_image.to(device), mask.to(device)
    
            # zero the parameter gradients
    			optimizer.zero_grad()
    
           # forward + backward + optimize
    			outputs = model(t_image)
    			loss = criterion(outputs, mask)
    			loss.backward()
    			optimizer.step()
    
    			print('Epoch {}, Loss {}'.format(epoch, loss.item()))
    print("finished training")
  
    # visualize
    output = model(t_image)
    print(output.size()) # ([1, 3, 1024, 1024])
    prob = F.softmax(output, 1)
    print(prob.size()) # ([1, 3, 1024, 1024])
    prob_imgs = make_grid(prob.permute(1, 0, 2, 3))
    print(prob_imgs.size())  # [3, 1028, 3080])
    prob_imgs = prob_imgs.detach()
    plt.imshow(prob_imgs.permute(0, 1, 2))
    plt.show()

#    for p in prob:
#        prob_imgs = make_grid(p.unsqueeze(1))
#        #plt.imshow(prob_imgs.permute(1, 2, 0))
#        plt.show()
    
    
#################### testing
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data in (test_loader):
            
            t_image, mask = data
            t_image, mask = t_image.to(device), mask.to(device)
            outputs = model.forward(t_image) # forward pass only to get logist/output
            test_loss += criterion(outputs, mask).data[0] # sum up batch loss
            _, predicted = torch.max(outputs.data, 1)  # Find the class index with the maximum value.
            correct += predicted.eq(mask.data).sum()
            test_loss /= len(test_loader.dataset)
            
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
                        test_loss, correct, len(test_loader.dataset),
                    100. * correct / len(test_loader.dataset)))    

if __name__=='__main__':
  main()

vmirly1 · December 13, 2018, 4:14am

This error message is related to reading images in the data-loader. Can you share how you defined your data loader?

ptrblck · December 13, 2018, 8:53am

It seems the permute call in plt.imshow should be prob_imgs.permute(1, 2, 0). Could you try that and see if it works?

Neda · December 13, 2018, 10:19am

Thanks a lot. Yes, it works now