RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 3, 7, 7], but got 3-dimensional input of size [8, 600, 800] instead

In this code:

network = Network()
network.cuda()    

criterion = nn.MSELoss()
optimizer = optim.Adam(network.parameters(), lr=0.0001)

loss_min = np.inf
num_epochs = 1

start_time = time.time()
for epoch in range(1,num_epochs+1):
    
    loss_train = 0
    loss_test = 0
    running_loss = 0
    
    
    network.train()
    print('size of train loader is: ', len(train_loader))

    for step in range(1, len(train_loader)+1):

        
        batch = next(iter(train_loader))
        images, landmarks = batch['image'], batch['landmarks']
        print(images.shape)
       
        images = images.unsqueeze_(1)

        images = torch.cat((images,images,images),1)
        images = images.cuda()
    
        landmarks = landmarks.view(landmarks.size(0),-1).cuda() 
        norm_image = transforms.Normalize(0.3812, 0.1123) 
        for image in images:
            image = image.float()
            ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
            image = norm_image(image)
        
        ###removing landmarks normalize because of the following error
        ###ValueError: Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = torch.Size([8, 8])
       
         
        for i in range(8):
            if(i%2==0):
                landmarks[:,i] = landmarks[:,i]/800
            else:
                landmarks[:,i] = landmarks[:,i]/600
                
        print(landmarks.shape)
        print(landmarks)
        
        

        
        norm_landmarks = transforms.Normalize(0.4949, 0.2165)
        landmarks [landmarks != landmarks] = 0
        landmarks = landmarks.unsqueeze_(0)
        landmarks = norm_landmarks(landmarks)
        
        predictions = network(images)
        
        # clear all the gradients before calculating them
        optimizer.zero_grad()
        
        print('predictions are: ', predictions.float())
        print('landmarks are: ', landmarks.float())
        # find the loss for the current step
        loss_train_step = criterion(predictions.float(), landmarks.float())
        
        
        loss_train_step = loss_train_step.to(torch.float32)
        print("loss_train_step before backward: ", loss_train_step)
        
        # calculate the gradients
        loss_train_step.backward()
        
        # update the parameters
        optimizer.step()
        
        print("loss_train_step after backward: ", loss_train_step)

        
        loss_train += loss_train_step.item()
        
        print("loss_train: ", loss_train)
        running_loss = loss_train/step
        print('step: ', step)
        print('running loss: ', running_loss)
        
        print_overwrite(step, len(train_loader), running_loss, 'train')
        
    network.eval() 
    with torch.no_grad():
        
        for step in range(1,len(test_loader)+1):
            
            batch = next(iter(train_loader))
            images, landmarks = batch['image'], batch['landmarks']
            images = images.cuda()
            landmarks = landmarks.view(landmarks.size(0),-1).cuda()
        
            predictions = network(images)

            # find the loss for the current step
            loss_test_step = criterion(predictions, landmarks)

            loss_test += loss_test_step.item()
            running_loss = loss_test/step

            print_overwrite(step, len(test_loader), running_loss, 'Validation')
    
    loss_train /= len(train_loader)
    loss_test /= len(test_loader)
    
    print('\n--------------------------------------------------')
    print('Epoch: {}  Train Loss: {:.4f} Valid Loss: {:.4f}'.format(epoch, loss_train, loss_test))
    print('--------------------------------------------------')
    
    if loss_test < loss_min:
        loss_min = loss_test
        torch.save(network.state_dict(), '../moth_landmarks.pth') 
        print("\nMinimum Valid Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))
        print('Model Saved\n')
     
print('Training Complete')
print("Total Elapsed Time : {} s".format(time.time()-start_time))

I get an output like this (only 1 epoch and batch size 8):

predictions are:  tensor([[ 0.7045, -0.3278,  1.5776, -0.4021, -0.4360, -0.3449,  0.3101,  0.1729],
        [-0.1054, -0.8457,  1.1455, -1.4383, -0.2255, -1.5432,  0.4840,  0.2633],
        [ 0.6063, -0.3380,  1.5276, -0.1688, -0.4002, -0.4386,  0.2235,  0.2763],
        [ 0.7645, -0.3076,  1.5969, -0.0705, -0.4203, -0.1109,  0.3278,  0.2545],
        [ 0.0870, -0.7487,  1.3689, -0.9824, -0.5139, -1.0490,  0.1499,  0.2277],
        [ 0.3001, -0.5763,  1.4895, -0.5979, -0.5189, -0.7502,  0.1075,  0.1689],
        [ 0.5470, -0.4144,  1.4193, -0.7669, -0.3635, -0.8172,  0.4247,  0.2369],
        [ 0.5765, -0.4259,  1.7669, -0.2599, -0.3736, -0.4769,  0.4189,  0.1796]],
       device='cuda:0', grad_fn=<AddmmBackward>)
landmarks are:  tensor([[[ 0.5227, -0.4615,  1.6575, -0.1304, -0.5076, -0.0149,  0.1815,
           0.0021],
         [ 0.6125, -0.4273,  1.2807, -1.3253, -0.2574, -1.2542,  0.6864,
           0.1575],
         [ 0.5452, -0.4067,  1.7557,  0.0543, -0.4961, -0.3306,  0.1323,
           0.4306],
         [ 0.5908, -0.4366,  1.7557,  0.1390, -0.5192,  0.1313,  0.6529,
           0.0236],
         [ 0.5366, -0.4232,  1.5478, -0.7771, -0.6289, -0.7463,  0.2288,
           0.3177],
         [ 0.5598, -0.4129,  1.7210, -0.4999, -0.5711, -0.4229,  0.1136,
           0.0983],
         [ 0.5255, -0.4495,  1.5651, -0.4999, -0.5711, -0.8463,  0.4566,
           0.1621],
         [ 0.6070, -0.4085,  1.8885, -0.2921, -0.6289, -0.1843,  0.6356,
           0.1390]]], device='cuda:0')
loss_train_step before backward:  tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward>)
loss_train_step after backward:  tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward>)
loss_train:  12.310782719403505
step:  90
running loss:  0.13678647466003896
Train Steps: 90/90  Loss: 0.1368 

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-31-c95badccd7c5> in <module>
    101             landmarks = landmarks.view(landmarks.size(0),-1).cuda()
    102 
--> 103             predictions = network(images)
    104 
    105             # find the loss for the current step

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

<ipython-input-10-46116d2a7101> in forward(self, x)
     10     def forward(self, x):
     11         x = x.float()
---> 12         out = self.model(x)
     13         return out

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/lib/python3.7/site-packages/torchvision/models/resnet.py in forward(self, x)
    218 
    219     def forward(self, x):
--> 220         return self._forward_impl(x)
    221 
    222 

~/anaconda3/lib/python3.7/site-packages/torchvision/models/resnet.py in _forward_impl(self, x)
    201     def _forward_impl(self, x):
    202         # See note [TorchScript super()]
--> 203         x = self.conv1(x)
    204         x = self.bn1(x)
    205         x = self.relu(x)

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
    417 
    418     def forward(self, input: Tensor) -> Tensor:
--> 419         return self._conv_forward(input, self.weight)
    420 
    421 class Conv3d(_ConvNd):

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
    414                             _pair(0), self.dilation, self.groups)
    415         return F.conv2d(input, weight, self.bias, self.stride,
--> 416                         self.padding, self.dilation, self.groups)
    417 
    418     def forward(self, input: Tensor) -> Tensor:

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 3, 7, 7], but got 3-dimensional input of size [8, 600, 800] instead

1

start_time = time.time()

2

​

Should I do something like RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[1, 8, 600, 800] to have 3 channels, but got 8 channels instead here too?

Also, could you please answer this?
If my landmarks are 4 x and y for each image (4 2D keypoints), for Normalize should I have 2 values for mean or one value? If I need to have two values for mean, how is it even actually working with having only one value for mean? Sorry for the existential question!

Yes! Please try reshaping it to [8,3,600,800]

1 Like

You don’t need to use transforms.Normalize, you can just divide x values by height and y values by width.

I am not sure what method to use but do you mean something like this?

Apologies for the mistake. It’s not reshape… please do a torch.cat as shown in this link