ValueError: Expected target size xxx, got torch.Size yyy

jdlin · February 16, 2021, 9:55pm

I am creating a network for deep learning classification.

When I ran my code on the GPU, it raised this error with wrong size mismatch. (target size (5, 50, 76, 80) and torch.size([5]))

I am using MRI datasets (depth, height, width) with two labels 0 and 1 (no masks).

Here’s the code I used for creating dataset:

class BrainS18Dataset(Dataset):

    def __init__(self, img_list, sets): #delete root_dir
        with open(img_list, 'r') as f:
            self.img_list = [line.strip() for line in f]
            self.img_list.pop(0)
        self.input_D = sets.input_D
        self.input_H = sets.input_H
        self.input_W = sets.input_W
        self.phase = sets.phase

    def __onehot__(self, label):
        if label == '1':
            label = np.array([0, 1], dtype='f')
        elif label == '0':
            label = np.array([1, 0], dtype='f')
        
        return label
    
    def __nii2tensorarray__(self, data):
        [z, y, x] = data.shape
        new_data = np.reshape(data, [1, z, y, x])
        new_data = new_data.astype("float32")
            
        return new_data
    
    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        
        if self.phase == "train":
            # read image and labels
            ith_info = self.img_list[idx].split(",")

            
            img_name = os.path.join(ith_info[0])
            label = ith_info[1]
            img = nibabel.load(img_name)  # We have transposed the data from WHD format to DHW
            assert img is not None
            assert label is not None
            
            # data processing

            img_array = self.__training_data_process__(img)       
            label = self.__onehot__(label)
            
            # 2 tensor array
            img_array = self.__nii2tensorarray__(img_array)

            return img_array, label
        
        elif self.phase == "test":
            # read image
            ith_info = self.img_list[idx].split(",")
            
            img_name = os.path.join(ith_info[0])
            label = ith_info[1]
            

            img = nibabel.load(img_name)
            assert img is not None

            # data processing
            img_array = self.__training_data_process__(img)    

            # 2 tensor array
            img_array = self.__nii2tensorarray__(img_array)
            
            label = self.__onehot__(label)

            return img_array, label

    def __random_crop_to_same_size__(self, data): 
        
        """
        Crop to be the same size
        """

        [img_d, img_h, img_w] = data.shape
        data_processed = np.zeros((200, 300, 320))
        if [img_d, img_h, img_w] == [208, 300, 320]:    # not return label here
            data_processed = data[4:204, :, :]
        elif [img_d, img_h, img_w] == [226,320,320]:
            data_processed = data[13:213, 10:310, :]
        
        return data_processed

    def __training_data_process__(self, data): 

        data = data.get_fdata()
        data = self.__random_crop_to_same_size__(data)

        return data

And the training codes:

def train(data_loader, model, optimizer, scheduler, total_epochs, save_interval, save_folder, sets):
    # settings
    batches_per_epoch = len(data_loader)
    log.info('{} epochs in total, {} batches per epoch'.format(total_epochs, batches_per_epoch))

    loss_f = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([1.4]))

    print("Current setting is:")
    print(sets)
    print("\n\n")     
    if not sets.no_cuda:
        loss_f = loss_f.cuda()
    
    losses = []
    loss_plot = []
    
    #model.train()
    train_time_sp = time.time()
    for epoch in range(total_epochs):
        log.info('Start epoch {}'.format(epoch+1))
        
        val_losses = []
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
                
            elif phase == 'val':
                print('Starting Validation: ')
                model.eval()
            z=0
        
            for batch_id, batch_data in enumerate(data_loaders[phase]):
                # getting data batch 
                z+=1
                batch_id_sp = epoch * batches_per_epoch
                volumes, labels = batch_data

                volumes = Variable(volumes.view(5,200,300,320))
                if not sets.no_cuda: 
                    volumes = volumes.cuda()

                if phase == 'train':
                    optimizer.zero_grad() 
                
                
                
                output = model(volumes)

                labels = Variable(torch.argmax(labels, dim=1))
                labels = labels.cuda()
                print(output.shape)
                print(labels.shape)
        
                loss = loss_f(output, labels)
                
                if phase == 'train':
                    losses.append(loss.item())
                    loss.backward()
                    optimizer.step()
                    
                    avg_batch_time = (time.time() - train_time_sp) / (1 + batch_id_sp)
                    
                    if False:
                        log.info(
                                '{} ||  Batch: {}-{} ({}), loss = {:.3f}, avg_batch_time = {:.3f}'\
                                .format(z, epoch, batch_id, batch_id_sp, loss.item(), avg_batch_time))
                  
                    if True:
                        # save model
                        if batch_id ==batches_per_epoch-1 and epoch % 1  == 0:
                            model_save_path = '{}/fdg_trail1_{}.pth.tar'.format(save_folder, epoch)
                            model_save_dir = os.path.dirname(model_save_path)
                            if not os.path.exists(model_save_dir):
                                os.makedirs(model_save_dir)
  
                            log.info('Save checkpoints: epoch = {}, batch_id = {}'.format(epoch, batch_id)) 
                            torch.save({
                                        'epoch': epoch,
                                        'batch_id': batch_id,
                                        'state_dict': model.state_dict(),
                                        'optimizer': optimizer.state_dict()},
                                        model_save_path)
   
                    loss_plot.append(np.mean(losses))
                    losses = []
                if phase == 'val':
                    val_losses.append(loss.item())
        if True:
            val_loss  = sum(val_losses)/len(val_losses)
            print('Epoch: {} || Validation Loss = {:.3f}'.format(epoch, val_loss))
            

    print('Finished training')      
                


if __name__ == '__main__':
    # settting
    sets = parse_opts()   
    
    # getting model
    
    torch.manual_seed(sets.manual_seed)
    model, parameters = generate_model(sets) 
    print (model)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=5e-4)   
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
    
    # train from resume
    if sets.resume_path:
        if os.path.isfile(sets.resume_path):
            print("=> loading checkpoint '{}'".format(sets.resume_path))
            checkpoint = torch.load(sets.resume_path)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
              .format(sets.resume_path, checkpoint['epoch']))

    # getting data
    sets.phase = 'train'
    if sets.no_cuda:
        sets.pin_memory = False
    else:
        sets.pin_memory = True    
    training_dataset = BrainS18Dataset(sets.img_list, sets)
    validation_dataset = BrainS18Dataset(sets.val_list, sets)
    data_loader = DataLoader(training_dataset, batch_size=sets.batch_size, shuffle=True, num_workers=sets.num_workers, pin_memory=sets.pin_memory)     
    validation_loader = DataLoader(validation_dataset, batch_size=sets.batch_size, shuffle=True, num_workers=sets.num_workers, pin_memory=sets.pin_memory)    
    
    
    data_loaders = {"train": data_loader, "val": validation_loader}
    # training
    train(data_loaders, model, optimizer, scheduler, total_epochs=sets.n_epochs, save_interval=sets.save_intervals, save_folder=sets.save_folder, sets=sets)

The error shows when using loss function (BCEWithLogitsLoss).

Also, for 2D image datasets, output = output.view(output, -1) is normally used to flatten the tensor in order to use the particular loss function, what is the common method for 3D images (with depths), can I also use view for the same circumstances?

Thanks for the help!

Dwight_Foster · February 16, 2021, 11:06pm

Have you tried output = output.view(output.shape[0], -1). It should be the same as 2D images.

jdlin · February 17, 2021, 1:30am

Thank you @Dwight_Foster . It works.

But now it raised another error:

Traceback (most recent call last):
  File "/home/clin15/Documents/MedicalNet-Tinnitus/train.py", line 194, in <module>
    train(data_loaders, model, optimizer, scheduler, total_epochs=sets.n_epochs, save_interval=sets.save_intervals, save_folder=sets.save_folder, sets=sets) 
  File "/home/clin15/Documents/MedicalNet-Tinnitus/train.py", line 77, in train
    output = model(volumes)
  File "/home/clin15/.conda/envs/pytorch_clin/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/clin15/.conda/envs/pytorch_clin/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 161, in forward
    outputs = self.parallel_apply(replicas, inputs, kwargs)
  File "/home/clin15/.conda/envs/pytorch_clin/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 171, in parallel_apply
    return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
  File "/home/clin15/.conda/envs/pytorch_clin/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 86, in parallel_apply
    output.reraise()
  File "/home/clin15/.conda/envs/pytorch_clin/lib/python3.7/site-packages/torch/_utils.py", line 428, in reraise
    raise self.exc_type(msg)
RuntimeError: Caught RuntimeError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/home/clin15/.conda/envs/pytorch_clin/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker
    output = module(*input, **kwargs)
  File "/home/clin15/.conda/envs/pytorch_clin/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/clin15/Documents/MedicalNet-Tinnitus/models/resnet.py", line 210, in forward
    x = self.conv1(x)
  File "/home/clin15/.conda/envs/pytorch_clin/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/clin15/.conda/envs/pytorch_clin/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 573, in forward
    self.padding, self.dilation, self.groups)
RuntimeError: Expected 5-dimensional input for 5-dimensional weight [64, 1, 7, 7, 7], but got 4-dimensional input of size [2, 200, 300, 320] instead

Do you by any chance know how to solve this?

Thanks.

Dwight_Foster · February 17, 2021, 1:10pm

That is because in your training loop you reshape the volumnes variable here:

                volumes, labels = batch_data

                volumes = Variable(volumes.view(5,200,300,320))
                if not sets.no_cuda: 
                    volumes = volumes.cuda()

that reshape makes it into a 4 dimensional image while the conv3d need 5 dimensional images. It seems that there isn’t a rgb dimension. If the images are grayscale you should just be able to add a 1 in between the 5 and 200 if it is rgb then that is a bigger problem.