Could you please walk me through how to fix this?
I also don’t know how to use PyTorch to find the mean and std and add it in the Normalize as args.
The error is:
size of train loader is: 12
images shape: torch.Size([64, 600, 800, 3])
landmarks shape: torch.Size([64, 8])
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-65-0706303f707b> in <module>
38 ##landmarks = torchvision.transforms.Normalize(landmarks)
39
---> 40 predictions = network(images)
41
42 # clear all the gradients before calculating them
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
<ipython-input-54-46116d2a7101> in forward(self, x)
10 def forward(self, x):
11 x = x.float()
---> 12 out = self.model(x)
13 return out
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
~/anaconda3/lib/python3.7/site-packages/torchvision/models/resnet.py in forward(self, x)
218
219 def forward(self, x):
--> 220 return self._forward_impl(x)
221
222
~/anaconda3/lib/python3.7/site-packages/torchvision/models/resnet.py in _forward_impl(self, x)
201 def _forward_impl(self, x):
202 # See note [TorchScript super()]
--> 203 x = self.conv1(x)
204 x = self.bn1(x)
205 x = self.relu(x)
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
417
418 def forward(self, input: Tensor) -> Tensor:
--> 419 return self._conv_forward(input, self.weight)
420
421 class Conv3d(_ConvNd):
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
414 _pair(0), self.dilation, self.groups)
415 return F.conv2d(input, weight, self.bias, self.stride,
--> 416 self.padding, self.dilation, self.groups)
417
418 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 600, 800, 3] to have 3 channels, but got 600 channels instead
Here is one of the frames from my dataset:
$ identify frame773.png
frame773.png PNG 800x600 800x600+0+0 8-bit sRGB 464145B 0.000u 0:00.000
#torch.autograd.set_detect_anomaly(True)
network = Network()
network.cuda()
criterion = nn.MSELoss()
optimizer = optim.Adam(network.parameters(), lr=0.0001)
loss_min = np.inf
num_epochs = 10
start_time = time.time()
for epoch in range(1,num_epochs+1):
loss_train = 0
loss_test = 0
running_loss = 0
network.train()
print('size of train loader is: ', len(train_loader))
for step in range(1,len(train_loader)+1):
##images, landmarks = next(iter(train_loader))
##print(type(images))
batch = next(iter(train_loader))
images, landmarks = batch['image'], batch['landmarks']
images = images.cuda()
landmarks = landmarks.view(landmarks.size(0),-1).cuda()
print('images shape: ', images.shape)
print('landmarks shape: ', landmarks.shape)
##images = torchvision.transforms.Normalize(images)
##landmarks = torchvision.transforms.Normalize(landmarks)
predictions = network(images)
# clear all the gradients before calculating them
optimizer.zero_grad()
# find the loss for the current step
loss_train_step = criterion(predictions.float(), landmarks.float())
print("type(loss_train_step) is: ", type(loss_train_step))
print("loss_train_step.dtype is: ",loss_train_step.dtype)
##loss_train_step = loss_train_step.to(torch.float32)
# calculate the gradients
loss_train_step.backward()
# update the parameters
optimizer.step()
loss_train += loss_train_step.item()
running_loss = loss_train/step
print_overwrite(step, len(train_loader), running_loss, 'train')
network.eval()
with torch.no_grad():
for step in range(1,len(test_loader)+1):
batch = next(iter(train_loader))
images, landmarks = batch['image'], batch['landmarks']
images = images.cuda()
landmarks = landmarks.view(landmarks.size(0),-1).cuda()
predictions = network(images)
# find the loss for the current step
loss_test_step = criterion(predictions, landmarks)
loss_test += loss_test_step.item()
running_loss = loss_test/step
print_overwrite(step, len(test_loader), running_loss, 'Testing')
loss_train /= len(train_loader)
loss_test /= len(test_loader)
print('\n--------------------------------------------------')
print('Epoch: {} Train Loss: {:.4f} Test Loss: {:.4f}'.format(epoch, loss_train, loss_test))
print('--------------------------------------------------')
if loss_test < loss_min:
loss_min = loss_test
torch.save(network.state_dict(), '../moth_landmarks.pth')
print("\nMinimum Test Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))
print('Model Saved\n')
print('Training Complete')
print("Total Elapsed Time : {} s".format(time.time()-start_time))
Or should I add the Normalized here in the transformed_dataset?
transformed_dataset = MothLandmarksDataset(csv_file='moth_gt.csv',
root_dir='.',
transform=transforms.Compose([
Rescale(256),
RandomCrop(224),
ToTensor()
]))
for i in range(len(transformed_dataset)):
sample = transformed_dataset[i]
print(i, sample['image'].size(), sample['landmarks'].size())
if i == 3:
break
My dataset contains 800 PNG images each with annotation for four landmarks.