In this code:
network = Network()
network.cuda()
criterion = nn.MSELoss()
optimizer = optim.Adam(network.parameters(), lr=0.0001)
loss_min = np.inf
num_epochs = 1
start_time = time.time()
for epoch in range(1,num_epochs+1):
loss_train = 0
loss_test = 0
running_loss = 0
network.train()
print('size of train loader is: ', len(train_loader))
for step in range(1, len(train_loader)+1):
batch = next(iter(train_loader))
images, landmarks = batch['image'], batch['landmarks']
print(images.shape)
images = images.unsqueeze_(1)
images = torch.cat((images,images,images),1)
images = images.cuda()
landmarks = landmarks.view(landmarks.size(0),-1).cuda()
norm_image = transforms.Normalize(0.3812, 0.1123)
for image in images:
image = image.float()
##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
image = norm_image(image)
###removing landmarks normalize because of the following error
###ValueError: Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = torch.Size([8, 8])
for i in range(8):
if(i%2==0):
landmarks[:,i] = landmarks[:,i]/800
else:
landmarks[:,i] = landmarks[:,i]/600
print(landmarks.shape)
print(landmarks)
norm_landmarks = transforms.Normalize(0.4949, 0.2165)
landmarks [landmarks != landmarks] = 0
landmarks = landmarks.unsqueeze_(0)
landmarks = norm_landmarks(landmarks)
predictions = network(images)
# clear all the gradients before calculating them
optimizer.zero_grad()
print('predictions are: ', predictions.float())
print('landmarks are: ', landmarks.float())
# find the loss for the current step
loss_train_step = criterion(predictions.float(), landmarks.float())
loss_train_step = loss_train_step.to(torch.float32)
print("loss_train_step before backward: ", loss_train_step)
# calculate the gradients
loss_train_step.backward()
# update the parameters
optimizer.step()
print("loss_train_step after backward: ", loss_train_step)
loss_train += loss_train_step.item()
print("loss_train: ", loss_train)
running_loss = loss_train/step
print('step: ', step)
print('running loss: ', running_loss)
print_overwrite(step, len(train_loader), running_loss, 'train')
network.eval()
with torch.no_grad():
for step in range(1,len(test_loader)+1):
batch = next(iter(train_loader))
images, landmarks = batch['image'], batch['landmarks']
images = images.cuda()
landmarks = landmarks.view(landmarks.size(0),-1).cuda()
predictions = network(images)
# find the loss for the current step
loss_test_step = criterion(predictions, landmarks)
loss_test += loss_test_step.item()
running_loss = loss_test/step
print_overwrite(step, len(test_loader), running_loss, 'Validation')
loss_train /= len(train_loader)
loss_test /= len(test_loader)
print('\n--------------------------------------------------')
print('Epoch: {} Train Loss: {:.4f} Valid Loss: {:.4f}'.format(epoch, loss_train, loss_test))
print('--------------------------------------------------')
if loss_test < loss_min:
loss_min = loss_test
torch.save(network.state_dict(), '../moth_landmarks.pth')
print("\nMinimum Valid Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))
print('Model Saved\n')
print('Training Complete')
print("Total Elapsed Time : {} s".format(time.time()-start_time))
I get an output like this (only 1 epoch and batch size 8):
predictions are: tensor([[ 0.7045, -0.3278, 1.5776, -0.4021, -0.4360, -0.3449, 0.3101, 0.1729],
[-0.1054, -0.8457, 1.1455, -1.4383, -0.2255, -1.5432, 0.4840, 0.2633],
[ 0.6063, -0.3380, 1.5276, -0.1688, -0.4002, -0.4386, 0.2235, 0.2763],
[ 0.7645, -0.3076, 1.5969, -0.0705, -0.4203, -0.1109, 0.3278, 0.2545],
[ 0.0870, -0.7487, 1.3689, -0.9824, -0.5139, -1.0490, 0.1499, 0.2277],
[ 0.3001, -0.5763, 1.4895, -0.5979, -0.5189, -0.7502, 0.1075, 0.1689],
[ 0.5470, -0.4144, 1.4193, -0.7669, -0.3635, -0.8172, 0.4247, 0.2369],
[ 0.5765, -0.4259, 1.7669, -0.2599, -0.3736, -0.4769, 0.4189, 0.1796]],
device='cuda:0', grad_fn=<AddmmBackward>)
landmarks are: tensor([[[ 0.5227, -0.4615, 1.6575, -0.1304, -0.5076, -0.0149, 0.1815,
0.0021],
[ 0.6125, -0.4273, 1.2807, -1.3253, -0.2574, -1.2542, 0.6864,
0.1575],
[ 0.5452, -0.4067, 1.7557, 0.0543, -0.4961, -0.3306, 0.1323,
0.4306],
[ 0.5908, -0.4366, 1.7557, 0.1390, -0.5192, 0.1313, 0.6529,
0.0236],
[ 0.5366, -0.4232, 1.5478, -0.7771, -0.6289, -0.7463, 0.2288,
0.3177],
[ 0.5598, -0.4129, 1.7210, -0.4999, -0.5711, -0.4229, 0.1136,
0.0983],
[ 0.5255, -0.4495, 1.5651, -0.4999, -0.5711, -0.8463, 0.4566,
0.1621],
[ 0.6070, -0.4085, 1.8885, -0.2921, -0.6289, -0.1843, 0.6356,
0.1390]]], device='cuda:0')
loss_train_step before backward: tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward>)
loss_train_step after backward: tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward>)
loss_train: 12.310782719403505
step: 90
running loss: 0.13678647466003896
Train Steps: 90/90 Loss: 0.1368
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-31-c95badccd7c5> in <module>
101 landmarks = landmarks.view(landmarks.size(0),-1).cuda()
102
--> 103 predictions = network(images)
104
105 # find the loss for the current step
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
<ipython-input-10-46116d2a7101> in forward(self, x)
10 def forward(self, x):
11 x = x.float()
---> 12 out = self.model(x)
13 return out
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
~/anaconda3/lib/python3.7/site-packages/torchvision/models/resnet.py in forward(self, x)
218
219 def forward(self, x):
--> 220 return self._forward_impl(x)
221
222
~/anaconda3/lib/python3.7/site-packages/torchvision/models/resnet.py in _forward_impl(self, x)
201 def _forward_impl(self, x):
202 # See note [TorchScript super()]
--> 203 x = self.conv1(x)
204 x = self.bn1(x)
205 x = self.relu(x)
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
417
418 def forward(self, input: Tensor) -> Tensor:
--> 419 return self._conv_forward(input, self.weight)
420
421 class Conv3d(_ConvNd):
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
414 _pair(0), self.dilation, self.groups)
415 return F.conv2d(input, weight, self.bias, self.stride,
--> 416 self.padding, self.dilation, self.groups)
417
418 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 3, 7, 7], but got 3-dimensional input of size [8, 600, 800] instead
1
start_time = time.time()
2
Should I do something like RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[1, 8, 600, 800] to have 3 channels, but got 8 channels instead here too?