Hello everyone,
I want to implement a 1D Convolutional Autoencoder. The architecture is pretty simple (see the code).
The thing is I can’t manage to overfit on one sample. But when I use the the “last_linear” layer, the model is able to overfit.
I understand that there are more parameters when using the “last_linear”, but shouldn’t the model be able to overfit even when not using this last linear layer?
Are there problems in the architecture? In the code?
Thanks for the help!
import numpy as np
import torch
from torch import nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.dim = 128
self.nb_features_enc = 16
self.nb_steps_features_enc = 5
self.size_latent_space = 16
self.nb_steps_linear_enc = 5
self.nb_steps_linear_dec = 5
self.nb_features_dec = 16
self.nb_steps_features_dec = 5
self.size_conv = 3
# encoder conv layers
encoder_features = list(np.linspace(1, self.nb_features_enc, self.nb_steps_features_enc).astype(int))
self.input_linear = self.dim // 2 ** (self.nb_steps_features_enc - 1)
self.size_view = self.input_linear * self.nb_features_enc
# encoder linear layers
encoder_linear = list(
np.linspace(self.size_view, self.size_latent_space, self.nb_steps_linear_enc).astype(int))
# decoder linear layers
decoder_linear = list(
np.linspace(self.size_latent_space, self.size_view, self.nb_steps_linear_dec).astype(int))
# decoder conv layers
decoder_features = list(np.linspace(self.nb_features_dec, 1, self.nb_steps_features_dec).astype(int))
self.activation = nn.ReLU()
# Encoder
self.encoder_features = nn.ModuleList()
for i in range(len(encoder_features) - 1):
self.encoder_features.append(nn.Conv1d(encoder_features[i], encoder_features[i + 1], self.size_conv,
padding=self.size_conv // 2))
self.encoder_features.append(self.activation)
self.encoder_features.append(nn.MaxPool1d(2))
self.encoder_linear = nn.ModuleList()
for i in range(len(encoder_linear) - 1):
self.encoder_linear.append(nn.Linear(encoder_linear[i], encoder_linear[i + 1]))
if i < len(encoder_linear) - 2:
self.encoder_linear.append(self.activation)
# Decoder
self.decoder_linear = nn.ModuleList()
for i in range(len(decoder_linear) - 1):
self.decoder_linear.append(nn.Linear(decoder_linear[i], decoder_linear[i + 1]))
self.decoder_linear.append(self.activation)
self.decoder_features = nn.ModuleList()
for i in range(len(decoder_features) - 1):
self.decoder_features.append(nn.Upsample(scale_factor=2, mode='linear'))
self.decoder_features.append(nn.Conv1d(decoder_features[i], decoder_features[i + 1], self.size_conv,
padding=self.size_conv // 2))
if i < len(decoder_features) - 2:
self.decoder_features.append(self.activation)
# self.last_linear = nn.Linear(self.dim, self.dim)
def enc(self, x):
x = x.permute(0, 2, 1)
for i, layer in enumerate(self.encoder_features):
x = layer(x)
x = torch.flatten(x, start_dim=1, end_dim=2)
for i, layer in enumerate(self.encoder_linear):
x = layer(x)
return x
def dec(self, x):
for i, layer in enumerate(self.decoder_linear):
x = layer(x)
x = x.view(x.shape[0], -1, self.input_linear)
for i, layer in enumerate(self.decoder_features):
x = layer(x)
# x = self.last_linear(x)
x = x.permute(0, 2, 1)
return x
def forward(self, x):
latent = self.enc(x)
output = self.dec(latent)
return output
if __name__ == '__main__':
model = Net().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
inputs = torch.randn([1, 128, 1]).cuda()
print()
print(model)
print()
print('number of parameters:', )
nb_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print()
print('Number of learnable parameters: ', nb_parameters, '\n')
for i in range(40000):
optimizer.zero_grad()
outputs = model(inputs)
loss = torch.mean((inputs - outputs) ** 2)
print('i:', i, ', loss:', loss.item())
loss.backward()
optimizer.step()