I want to create and train AutoEncoder to extract features and use that features for the clustering algorithms. Right now I am getting errors while calculating the loss.
RuntimeError: The size of tensor a (224) must match the size of tensor b (244) at non-singleton dimension 3
and a warning
UserWarning: Using a target size (torch.Size([1, 3, 224, 244])) that is different to the input size (torch.Size([1, 3, 224, 224])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
- return F.mse_loss(input, target, reduction=self.reduction)*
Can anyone tell me what is wrong with this? In warning and error size of input and output is the same but it is saying it is different.
The summary sizes of input and output images are as follow
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 16, 112, 112] 448
ReLU-2 [-1, 16, 112, 112] 0
Conv2d-3 [-1, 32, 56, 56] 4,640
ReLU-4 [-1, 32, 56, 56] 0
Conv2d-5 [-1, 64, 18, 18] 100,416
ReLU-6 [-1, 64, 18, 18] 0
Conv2d-7 [-1, 128, 3, 3] 401,536
ReLU-8 [-1, 128, 3, 3] 0
Conv2d-9 [-1, 256, 1, 1] 295,168
ConvTranspose2d-10 [-1, 128, 3, 3] 295,040
ReLU-11 [-1, 128, 3, 3] 0
ConvTranspose2d-12 [-1, 64, 12, 12] 401,472
ReLU-13 [-1, 64, 12, 12] 0
ConvTranspose2d-14 [-1, 24, 28, 28] 75,288
ReLU-15 [-1, 24, 28, 28] 0
ConvTranspose2d-16 [-1, 16, 56, 56] 3,472
ReLU-17 [-1, 16, 56, 56] 0
ConvTranspose2d-18 [-1, 8, 111, 111] 1,160
ReLU-19 [-1, 8, 111, 111] 0
ConvTranspose2d-20 [-1, 3, 224, 224] 603
Sigmoid-21 [-1, 3, 224, 224] 0
================================================================
Total params: 1,579,243
Trainable params: 1,579,243
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 9.94
Params size (MB): 6.02
Estimated Total Size (MB): 16.54
----------------------------------------------------------------
Min Value of input Image = tensor(0.0627)
Max Value of input Image = tensor(0.5098)
Input Image shape = torch.Size([1, 3, 224, 244])
Output Image shape = torch.Size([1, 3, 224, 224])
My Autoencoder class is
class autoencoder(nn.Module):
def __init__(self):
super(autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 16, 3, stride=2, padding=1), # b, 16, 10, 10
nn.ReLU(True),
nn.Conv2d(16, 32, 3, stride=2, padding=1), # b, 16, 10, 10
nn.ReLU(True),
nn.Conv2d(32, 64, 7, stride=3, padding=1), # b, 16, 10, 10
nn.ReLU(True),
nn.Conv2d(64, 128, 7, stride=5, padding=1), # b, 16, 10, 10
nn.ReLU(True),
nn.Conv2d(128, 256, 3, stride=5, padding=1) # b, 16, 10, 10
)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(256, 128, 3), # b, 16, 5, 5
nn.ReLU(True),
nn.ConvTranspose2d(128, 64, 7,stride=3, padding=1,output_padding=1), # b, 16, 5, 5
nn.ReLU(True),
nn.ConvTranspose2d(64, 24, 7,stride=2, padding=1,output_padding=1), # b, 16, 5, 5
nn.ReLU(True),
nn.ConvTranspose2d(24, 16, 3, stride=2, padding=1,output_padding=1), # b, 8, 15, 15
nn.ReLU(True),
nn.ConvTranspose2d(16, 8, 3, stride=2, padding=1), # b, 1, 28, 28
nn.ReLU(True),
nn.ConvTranspose2d(8,3, 5, stride=2, padding=1,output_padding=1), # b, 1, 28, 28
nn.Sigmoid()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
and training function is as follow
dataset = DatasetLoader('E:/DAL/Dataset/Images', get_transform(train=True))
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset, indices[-50:])
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=1, shuffle=True, num_workers=0)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=0)
model = autoencoder().cuda()
summary(model, (3, 224, 224))
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,weight_decay=1e-5)
total_loss = 0
for epoch in range(num_epochs):
for data in data_loader:
# print(data)
img = data
print("Min Value of input Image = ",torch.min(img))
print("Max Value of input Image = ",torch.max(img))
img = Variable(img).cuda()
# ===================forward=====================
output = model(img)
print("Input Image shape = ",img.shape)
print("Output Image shape = ",output.shape)
loss = criterion(output, img)
# ===================backward====================
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log========================
total_loss += loss.data
print('epoch [{}/{}], loss:{:.4f}'
.format(epoch+1, num_epochs, total_loss))
if epoch % 10 == 0:
pic = to_img(output.cpu().data)
save_image(pic, './dc_img/image_{}.png'.format(epoch))
torch.save(model.state_dict(), './conv_autoencoder.pth')
Dataloader Class and transform function is as follow
def get_transform(train):
transforms = []
transforms.append(T.Resize((224,244)))
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
transforms.append(T.RandomVerticalFlip(0.5))
transforms.append(T.ToTensor())
return T.Compose(transforms)
class DatasetLoader(torch.utils.data.Dataset):
def __init__(self, root, transforms=None):
self.root = root
self.transforms = transforms
self.imgs = list(sorted(os.listdir(root)))
def __getitem__(self, idx):
img_path = os.path.join(self.root, self.imgs[idx])
img = Image.open(img_path).convert("RGB")
if self.transforms is not None:
img = self.transforms(img)
return img
def __len__(self):
return len(self.imgs)