I am new to pytorch. I am trying to mimic a code with the following structure. For simplification, let’s say I have an encoder that moves features X to the latent space (R) and a predictor to predict Y (calling it Y_hat).
my question is in the use of self.train() and self.eval ().
I feel like I am messing them up, but I am not sure how.
Could you please help me and see if what I have is correct?
I have the following structure:
I have encoder and predictor NN as separate classes.
here is the encoder:
class Encoder(nn.Module):
def __init__(self, input_dim , hidden_dim, encoded_dim, num_layer):
super(Encoder, self).__init__()
layers = []
for i in range(num_layer-1):
layers.append(nn.Linear(input_dim, hidden_dim))
layers.append(nn.Dropout(p=0.1))
layers.append(nn.ReLU())
input_dim=hidden_dim
if num_layer==1:
hidden_dim=input_dim
elif num_layer==2:
del layers[-1]
else: #>2
del layers[-2]
layers.append(nn.Linear(hidden_dim, encoded_dim))
self.main = nn.Sequential(*layers)
def forward(self, x):
R = self.main(x)
return R
here is predictor:
class Predictor(nn.Module):
def __init__(self, encoded_dim):
super(Predictor, self).__init__()
self.main = nn.Sequential(
nn.Linear(encoded_dim, 1),
nn.Sigmoid()
)
def forward(self, x):
Y_hat = self.main(x)
return Y_hat
I have the following class for the train and eval:
class MyClass(nn.Module):
def __init__(self, input_dim , hidden_dim, gamma, encoded_dim,num_layer LR=0.001):
super().__init__()
self.gamma = gamma
self.LR=LR
# Modules
self.encoder = Encoder(input_dim , hidden_dim, encoded_dim, num_layer)
self.predictor = Predictor(encoded_dim)
# Optimizers
self.enc_optimizer = torch.optim.Adam(self.encoder.parameters(), lr = LR, weight_decay = 1e-8)
self.pred_optimizer = torch.optim.Adam(self.predictor.parameters(), lr = LR, weight_decay = 1e-8)
# Loss functions
self.prediction_loss = nn.BCELoss()
def train(self, dataloader:DataLoader) -> DataLoader:
self.train()
for j,T in enumerate(dataloader, 0):
X,Y,=T[0],T[1]
R = self.encoder(X)
Y_hat = self.predictor(R)
# loss for the predictor
E = self.prediction_loss(Y_hat, Y.unsqueeze(1))
L = self.gamma*E
# Gradient steps
L.backward()
self.enc_optimizer.step()
self.pred_optimizer.step()
self.pred_optimizer.zero_grad()
self.enc_optimizer.zero_grad()
return L.item()
def evaluate(self,loader: DataLoader):
self.eval()
with torch.inference_mode():
for i, data in enumerate(loader):
X,Y,=data[0],data[1]
R = self.encoder(X)
Y_hat = self.predictor(R).squeeze(1) > 0.5
# loss for the reconstruction
E = self.prediction_loss(self.predictor(R), Y.unsqueeze(1))
L = self.gamma*E
total += Y.size(0)
results = (Y_hat == Y)
correct += results.sum()
y_acc = correct / total
return y_acc, L.item()
Now to run the algorithm, I have the following lines:
set_seed(config['seed'])
model_1 = MyClass(input_dim=config["input_dim_level_1"], hidden_dim=config["hidden_dim_level_1"], encoded_dim=config["encoded_dim_level_1"], num_layer=config['num_layer_level_1'],LR=config['learning_rate'],gamma=config['gamma'])
epochs=config['epoch_num']
for epoch in range(epochs):
Loss_train=model_1.train(epoch,dataloader=train_loader])
y_acc_valid, loss_valid=model_1.evaluate(epoch,validation_loader)
# freeze weights for model_1
for param in model_1.parameters():
param.requires_grad = False
y_acc_test, loss_test=model_1.evaluate(epoch,test_loader,filename=output_filename_eval)