RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 600, 800, 3] to have 3 channels, but got 600 channels instead

Could you please walk me through how to fix this?

I also don’t know how to use PyTorch to find the mean and std and add it in the Normalize as args.
The error is:

size of train loader is:  12
images shape:  torch.Size([64, 600, 800, 3])
landmarks shape:  torch.Size([64, 8])

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-65-0706303f707b> in <module>
     38         ##landmarks = torchvision.transforms.Normalize(landmarks)
     39 
---> 40         predictions = network(images)
     41 
     42         # clear all the gradients before calculating them

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

<ipython-input-54-46116d2a7101> in forward(self, x)
     10     def forward(self, x):
     11         x = x.float()
---> 12         out = self.model(x)
     13         return out

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/lib/python3.7/site-packages/torchvision/models/resnet.py in forward(self, x)
    218 
    219     def forward(self, x):
--> 220         return self._forward_impl(x)
    221 
    222 

~/anaconda3/lib/python3.7/site-packages/torchvision/models/resnet.py in _forward_impl(self, x)
    201     def _forward_impl(self, x):
    202         # See note [TorchScript super()]
--> 203         x = self.conv1(x)
    204         x = self.bn1(x)
    205         x = self.relu(x)

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
    417 
    418     def forward(self, input: Tensor) -> Tensor:
--> 419         return self._conv_forward(input, self.weight)
    420 
    421 class Conv3d(_ConvNd):

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
    414                             _pair(0), self.dilation, self.groups)
    415         return F.conv2d(input, weight, self.bias, self.stride,
--> 416                         self.padding, self.dilation, self.groups)
    417 
    418     def forward(self, input: Tensor) -> Tensor:

RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 600, 800, 3] to have 3 channels, but got 600 channels instead

Here is one of the frames from my dataset:


$ identify frame773.png
frame773.png PNG 800x600 800x600+0+0 8-bit sRGB 464145B 0.000u 0:00.000
#torch.autograd.set_detect_anomaly(True)
network = Network()
network.cuda()    

criterion = nn.MSELoss()
optimizer = optim.Adam(network.parameters(), lr=0.0001)

loss_min = np.inf
num_epochs = 10

start_time = time.time()
for epoch in range(1,num_epochs+1):
    
    loss_train = 0
    loss_test = 0
    running_loss = 0
    
    
    network.train()
    print('size of train loader is: ', len(train_loader))

    for step in range(1,len(train_loader)+1):
    
        ##images, landmarks = next(iter(train_loader))
        ##print(type(images))
        
        batch = next(iter(train_loader))
        images, landmarks = batch['image'], batch['landmarks']
        
        images = images.cuda()
    
        landmarks = landmarks.view(landmarks.size(0),-1).cuda() 
        
        print('images shape: ', images.shape)
        print('landmarks shape: ', landmarks.shape)
        
        ##images = torchvision.transforms.Normalize(images)
        ##landmarks = torchvision.transforms.Normalize(landmarks)
        
        predictions = network(images)
        
        # clear all the gradients before calculating them
        optimizer.zero_grad()
        
        # find the loss for the current step
        loss_train_step = criterion(predictions.float(), landmarks.float())
        
        print("type(loss_train_step) is: ", type(loss_train_step))
        
        print("loss_train_step.dtype is: ",loss_train_step.dtype)
        
        ##loss_train_step = loss_train_step.to(torch.float32)
        
        # calculate the gradients
        loss_train_step.backward()
        
        # update the parameters
        optimizer.step()
        
        loss_train += loss_train_step.item()
        running_loss = loss_train/step
        
        print_overwrite(step, len(train_loader), running_loss, 'train')
        
    network.eval() 
    with torch.no_grad():
        
        for step in range(1,len(test_loader)+1):
            
            batch = next(iter(train_loader))
            images, landmarks = batch['image'], batch['landmarks']        
            images = images.cuda()
            landmarks = landmarks.view(landmarks.size(0),-1).cuda()
        
            predictions = network(images)

            # find the loss for the current step
            loss_test_step = criterion(predictions, landmarks)

            loss_test += loss_test_step.item()
            running_loss = loss_test/step

            print_overwrite(step, len(test_loader), running_loss, 'Testing')
    
    loss_train /= len(train_loader)
    loss_test /= len(test_loader)
    
    print('\n--------------------------------------------------')
    print('Epoch: {}  Train Loss: {:.4f}  Test Loss: {:.4f}'.format(epoch, loss_train, loss_test))
    print('--------------------------------------------------')
    
    if loss_test < loss_min:
        loss_min = loss_test
        torch.save(network.state_dict(), '../moth_landmarks.pth') 
        print("\nMinimum Test Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))
        print('Model Saved\n')
     
print('Training Complete')
print("Total Elapsed Time : {} s".format(time.time()-start_time))

Or should I add the Normalized here in the transformed_dataset?

transformed_dataset = MothLandmarksDataset(csv_file='moth_gt.csv',
                                           root_dir='.',
                                           transform=transforms.Compose([
                                               Rescale(256),
                                               RandomCrop(224),
                                               ToTensor()
                                           ]))

for i in range(len(transformed_dataset)):
    sample = transformed_dataset[i]

    print(i, sample['image'].size(), sample['landmarks'].size())

    if i == 3:
        break

My dataset contains 800 PNG images each with annotation for four landmarks.

You are passing the input images as “channels-last” (NHWC), while PyTorch expects “channels-first” (NCHW).
permute the input batch such that the channels are in dim1.

1 Like

do you mean something like?

        #RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 600, 800, 3] to have 3 channels, but got 600 channels instead
    
        images = images.permute(3,1)

I understand the following example but not sure how to apply to my situation above:

Thank you so much. This worked:

test_images_permute = images.permute(0, 3, 1, 2)
print(test_images_permute.shape)