I would like to train my Autoencoder in a special way: First only the decoding part (encoding fixed) and when it has finished I want to train the encoding part (decoding fixed).
I set
model.fc1.weight.requires_grad = False
and train the decoder (enocder is only one layer). Everything is fine until it finishes training the decoder. But when I reset the parameters
for p in model.parameters():
p.requires_grad = False
model.fc1.weight.requires_grad = True
and define a new optimizer to train my encoder, I instantly get NaNs in my model. Still after testing different things, I cannot figure out why.
I attached my coda rewritten as a minimum-working example.
import numpy as np
import numpy.random as rnd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.utils.data as data_utils
import torch.optim as optim
from torch.autograd import Variable
###########################################################
# Define model
###########################################################
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(100, 80, bias=False)
self.fc2 = nn.Linear(80, 100)
self.fc3 = nn.Linear(100, 100)
self.fc4 = nn.Linear(100, 100)
self.fc5 = nn.Linear(100, 100)
self.rl = nn.LeakyReLU(negative_slope=0.2)
def forward(self, x):
x = self.fc1(x)
x = self.rl(self.fc2(x))
x = self.rl(self.fc3(x))
x = self.rl(self.fc4(x))
x = self.fc5(x)
return x
###########################################################
# Train Recovery
###########################################################
def train(epochs):
epoch = 1
while epoch <= epochs:
# Train on train set
train_loss = 0
for batch_idx, (data, _) in enumerate(train_loader):
data = Variable(data.type(torch.FloatTensor).cuda())
optimizer.zero_grad()
output = model(data)
loss = criterion(output, data)
train_loss += loss
loss.backward()
optimizer.step()
train_loss /= len_trainset
train_error.append(train_loss.data[0])
# Eval validation set
val_loss = 0
val_loss_l0 = 0
for data, _ in val_loader:
data = Variable(data.type(torch.FloatTensor)).cuda()
output = model(data)
val_loss += criterion(output, data)
val_loss /= len_valset
val_error.append(val_loss.data[0])
if epoch % 10 == 0:
print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, train_loss.data[0]))
print(' Test Epoch: {} \tLoss: {:.6f}\n'.format(epoch, val_loss.data[0]))
epoch += 1
###########################################################
# Main
###########################################################
if __name__ == '__main__':
train_error = []
val_error = []
len_trainset = 25000
len_valset = 5000
torch.manual_seed(0)
rnd.seed(0)
def sparse_data(N, k, num):
X = np.zeros((N, num))
X[0:k,:] = rnd.normal(0, 1, size=(k, num))
idx_1 = rnd.sample(X.shape).argsort(axis=0)
idx_2 = np.tile(np.arange(X.shape[1]), (X.shape[0], 1))
return np.transpose(X[idx_1, idx_2])
# Prepare data
kwargs = {'num_workers': 1, 'pin_memory': True}
X_train = sparse_data(100, 20, len_trainset)
X_val = sparse_data(100, 20, len_valset)
Strain = data_utils.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(X_train))
Sval = data_utils.TensorDataset(torch.from_numpy(X_val), torch.from_numpy(X_val))
train_loader = data_utils.DataLoader(Strain, batch_size=128, shuffle=True, **kwargs)
val_loader = data_utils.DataLoader(Sval, batch_size=128, shuffle=False, **kwargs)
model = Net().cuda()
model.train()
# Train Decoding Part
model.fc1.weight.requires_grad = False
criterion = nn.MSELoss(size_average=False).cuda()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
train(150)
# Train Complete Net
for p in model.parameters():
p.requires_grad = False
model.fc1.weight.requires_grad = True
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
train(150)