Hello,
I want to train a simple MLP of three layers using a table of data. The training data is such that the target is simply a multiplication of my three inputs.
target = x1 * x2 * x3
Here is the MLP network that I have defined using torch.nn:
class MLP(nn.Module):
def init(self, input_number, output_number, HLNN1, HLNN2):
super().init()
self.fc1 = nn.Linear(input_number, HLNN1)
self.fc2 = nn.Linear(HLNN1, HLNN2)
self.fc_out = nn.Linear(HLNN2, output_number)def forward(self, x): x = self.fc1(x) x = F.relu(x) x = self.fc2(x) x = F.tanh(x) x = self.fc_out(x) return x
model = MLP(1, 1, 30, 10)
BATCH_SIZE = 3
EVALUATION_PRECENT = 0.2
TRAINING_PRECENT = 1- EVALUATION_PRECENT
LR = .5
EPOCH_NUMBER = 50
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), LR)total_data = pd.read_excel(“d:\Data.xlsx”)
total_data_num = len(total_data)
training_data_num = int(TRAINING_PRECENT * total_data_num)
evaluation_data_num = total_data_num - training_data_numtotal_data = np.array(total_data)
total_data = torch.tensor(total_data, dtype=torch.float32)
train_data, eval_data = D.random_split(total_data, [training_data_num, evaluation_data_num])
train_data = train_data[:]
eval_data = eval_data[:]train_dataloader = D.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
eval_dataloader = D.DataLoader(eval_data, batch_size=BATCH_SIZE, shuffle=True)
def train_model(model, data_loader, loss_func, optimizer_func, epoch_num):
epoch_loss = 0
n = len(data_loader)
model.train()
for eopch in range(epoch_num+1):
epoch_loss = 0
for data in data_loader:
input_data = data[:, 0]
input_data = torch.reshape(input_data, (len(input_data),1))
target_data = data[:, 3]
prediction = model(input_data)
loss = loss_func(prediction, target_data)
optimizer_func.zero_grad()
loss.backward()
optimizer_func.step()
epoch_loss += loss
mean_loss = epoch_loss / n
print(f"EPOCH [{eopch:>02}]: Avrage Loss: {mean_loss:>.02}")train_model(model, train_dataloader, criterion, optimizer, EPOCH_NUMBER)
where F is torch.nn.functional module.
I have also written a function to train the model as follows:
I tried to train the above model using 1100 training data and the above training function. However, the average loss in each epoch is approximately 1e+13 and doesn’t decrease during the training process.
Could anyone help me please about this issue and also about what that may be wrong in my code?
Thank you,
Regards