I’ve been trying to develop an Autoencoder model for the task of knowledge representation where my input is a sequence of images. Loss of this model (both training and testing) doesn’t decrease. It is instead fluctuating and of course, my image reconstruction is very poor. I’ve tried the following:
- Changing the learning rate between 1 and 0.001
- Increasing and decreasing the batch size
- With and without dropout layers
Encoder-Decoder model
class Encoder_Decoder(nn.Module):
def __init__(self):
super(Encoder_Decoder, self).__init__()
#Encoder
self.encoder = nn.Sequential(nn.Conv3d(in_channels=3, out_channels=16, kernel_size=(3, 3, 3), padding=(0,0,0)),
nn.ReLU(), nn.BatchNorm3d(16),
nn.MaxPool3d(kernel_size=(1,2,2)),
nn.Conv3d(in_channels=16, out_channels=64, kernel_size=(3, 3, 3), padding=(0,1,1)),
nn.ReLU(), nn.BatchNorm3d(64),
nn.MaxPool3d(kernel_size=(2,2,2)),
nn.Conv3d(in_channels=64, out_channels=256, kernel_size=(3, 3, 3), padding=(1,1,1)),
nn.ReLU(), nn.BatchNorm3d(256),
nn.MaxPool3d(kernel_size=(2,2,2)),
nn.Conv3d(in_channels=256, out_channels=64, kernel_size=(3,3,3),padding=(1,1,1)),
nn.ReLU(), nn.BatchNorm3d(64),
nn.MaxPool3d(kernel_size=(2,2,2)),
nn.Conv3d(in_channels=64, out_channels=16, kernel_size=(1,1,1), padding=(0,0,0)),
nn.ReLU(), nn.BatchNorm3d(16),
nn.MaxPool3d(kernel_size=(2,1,1)),
nn.Conv3d(in_channels=16, out_channels=4, kernel_size=(1,1,1), padding=(0,0,0)),
nn.ReLU(), nn.BatchNorm3d(4),
nn.MaxPool3d(kernel_size=(1,1,1)))
#Decoder
self.decoder = nn.Sequential(nn.ConvTranspose3d(in_channels=4, out_channels=16, kernel_size=3),
nn.ReLU(),
nn.Upsample(scale_factor=(2,2,2)),
nn.ConvTranspose3d(in_channels=16, out_channels=64, kernel_size=3),
nn.ReLU(),
nn.Upsample(scale_factor=(2,2,2)),
nn.ConvTranspose3d(in_channels=64, out_channels=256, kernel_size=3),
nn.ReLU(),
nn.Upsample(scale_factor=(2,2,2)),
nn.ConvTranspose3d(in_channels=256, out_channels=64, kernel_size=3),
nn.ReLU(),
nn.Upsample(scale_factor=(2,2,2)),
nn.ConvTranspose3d(in_channels=64, out_channels=16, kernel_size=1),
nn.ReLU(),
nn.Upsample(size=(22,28,28)),
nn.ConvTranspose3d(in_channels=16, out_channels=3, kernel_size=1),
nn.ReLU(),
nn.Upsample(size=(22,28,28)))
def forward(self,x):
# x has the shapw (16,22,3,28,28)
x1 = self.encoder(x)
# x1 has the shape (16,4,1,1,1)
x2 = self.decoder(x1)
# x2 has the shape (16,22,3,28,28)
return x1, x2
Train:
def train(model, trainloader, criterion, optimizer, epoch):
model.train()
for batch_idx, inputs in enumerate(trainloader):
inputs = inputs.float()
if torch.cuda.is_available():
inputs = inputs.to("cuda")
optimizer.zero_grad()
encoded_vectors,outputs = model(inputs)
loss = criterion(outputs,inputs)
loss.backward()
optimizer.step()
if batch_idx % 50 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(inputs), len(trainloader.dataset),
100. * batch_idx / len(trainloader), loss.item()))
test:
def test(model, criterion1, testloader):
model.eval()
test_loss = 0
for batch_idx, inputs in enumerate(testloader):
if torch.cuda.is_available():
inputs = inputs.cuda()
encoded_vectors,outputs = model(inputs)
loss = criterion(outputs,inputs)
test_loss += loss.item() * inputs.shape[0]
test_loss /= len(testloader.dataset)
print('\nTest set: Average loss: {:.4f}\n'.format(test_loss))
if abs(test_loss) <= 0.005:
return True
else:
return False
def main():
model = Encoder_Decoder()
model = model.to('cuda')
criterion = torch.nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.005, eps=1e-3, amsgrad=False)
for epoch in range(20):
train(model, train_loader, criterion, optimizer, epoch)
test(model, criterion, test_loader)
return model
if name == “main”:
model = main()