Hey.
Recently, I used autoencoder to get the hidden representation of the original input x.
I create a class named AEFactory to create some similar but slightly different models.
class AEFactory(nn.Module):
def __init__(self, n_enc_1, n_enc_2, n_enc_3, n_dec_1, n_dec_2, n_dec_3,
n_input, n_z, n_topic=None, deliver=False):
super(AEFactory, self).__init__()
self.enc_1 = Linear(n_input, n_enc_1)
self.enc_2 = Linear(n_enc_1, n_enc_2)
self.enc_3 = Linear(n_enc_2, n_enc_3)
self.z_layer = Linear(n_enc_3, n_z)
self.dec_1 = Linear(n_z, n_dec_1)
self.dec_2 = Linear(n_dec_1, n_dec_2)
self.dec_3 = Linear(n_dec_2, n_dec_3)
self.x_bar_layer = Linear(n_dec_3, n_input)
if n_topic is not None:
self.classifier_layer = Linear(n_z, n_topic)
else:
self.classifier_layer = None
self.deliver = deliver
def forward(self, x):
enc_h1 = F.relu(self.enc_1(x))
enc_h2 = F.relu(self.enc_2(enc_h1))
enc_h3 = F.relu(self.enc_3(enc_h2))
z = self.z_layer(enc_h3)
dec_h1 = F.relu(self.dec_1(z))
dec_h2 = F.relu(self.dec_2(dec_h1))
dec_h3 = F.relu(self.dec_3(dec_h2))
x_bar = self.x_bar_layer(dec_h3)
if self.classifier_layer is not None:
c = self.classifier_layer(F.relu(z))
c = F.log_softmax(c, dim=1)
if self.deliver:
return x_bar, enc_h1, enc_h2, enc_h3, z, c
return x_bar, z, c
if self.deliver:
return x_bar, enc_h1, enc_h2, enc_h3, z
return x_bar, z
but when I use model 1:
model = AEFactory(500,
500,
2000,
2000,
500,
500,
n_input=n_input,
n_topic=None,
n_z=n_z).to(device)
model 2:
model = AEFactory(500,
500,
2000,
2000,
500,
500,
n_input=n_input,
n_topic=30,
n_z=n_z).to(device)
use the similar train process:
def train_ae(model, pre_epoch, x, lr, pretrain_path):
dataset = SimpleData(x)
train_loader = DataLoader(dataset, batch_size=256, shuffle=True)
optimizer = Adam(model.parameters(), lr=lr)
print("start pre_train")
for epoch in range(pre_epoch):
for batch_idx, batch_x in enumerate(train_loader):
batch_x = batch_x.to(device)
optimizer.zero_grad()
x_bar, z, _ = model(batch_x)
loss = F.mse_loss(x_bar, batch_x)
loss.backward()
optimizer.step()
print("epoch {} loss={:.4f}".format(epoch, loss))
torch.save(model.state_dict(), pretrain_path)
print("model saved to {}.".format(pretrain_path))
print("end_pre_train")
they have different loss.
In my opinion, they use the same structure. Although model2 adds a linear layer, the linear layer parameters are not used.
Why?