Hi guys, I want to find best dropout value for my model and I use kfold for it, and sometimes model don’t training if I retrain on the same fold and the same data and the same everything … the problem dissapier
and the same problem happens if I change dropout to batchnorm
here my training loop
def train_model(dataloader_train,drpt):
model = TorchModel(drpt)
model = model.to(Config.device)
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1.0e-2, weight_decay=1e-3)
epochs = tqdm(range(70),leave=True)
epochs.set_description("Epoch")
epochs_loss = []
#scheduler = StepLR(optimizer, step_size=40, gamma=0.1)
scheduler = MultiStepLR(optimizer, milestones=[25], gamma=0.1)
for epoch in epochs:
running_loss = 0.0
for data in dataloader_train:
X = data["X"].to(Config.device)
y = data["y"].to(Config.device)
optimizer.zero_grad()
preds = model(X)
loss_value = loss(preds, y)
loss_value.backward()
optimizer.step()
running_loss += loss_value.item() / y.shape[0]
scheduler.step()
epochs.set_postfix(
epoch=epoch, loss=running_loss, lr=scheduler.get_last_lr()[0]
)
epochs_loss.append(loss_value.item())
return model, epochs_loss
and my kfold
X_train,y_train=get_df(path_train)
columns=X_train.columns
kfolds = KFold(n_splits=5)
splits = kfolds.split(X_train,y_train)
result=[]
for train_index, valid_index in splits:
X_tr, X_val = X_train[columns].iloc[train_index], X_train[columns].iloc[valid_index]
y_tr, y_val = y_train[train_index], y_train[valid_index]
dataset_train=CustomDataset(X_tr,y_tr,'Train')
dataset_val=CustomDataset(X_val,y_val,'Val')
dataloader_train = DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0, pin_memory=True)
dataloader_val = DataLoader(dataset_val, batch_size=128, shuffle=False, num_workers=0, pin_memory=True)
model,epochs_loss=train_model(dataloader_train,drpt);
recall,y_pred_valid = get_recall(model, dataloader_val, y_val)
cm=confusion_matrix(y_true=y_val,y_pred=y_pred_valid)
result.append({
'recall':recall,
'cm':cm,
'epochs_loss':epochs_loss,
#'model':model,
})
and sometimes I got this result
Epoch: 100%|██████████| 70/70 [00:57<00:00, 1.22it/s, epoch=69, loss=0.0194, lr=0.001]
tensor(0.9462)
Epoch: 100%|██████████| 70/70 [01:02<00:00, 1.11it/s, epoch=69, loss=0.0204, lr=0.001]
tensor(0.9627)
Epoch: 100%|██████████| 70/70 [00:58<00:00, 1.20it/s, epoch=69, loss=0.537, lr=0.001]
tensor(0.1491)
Epoch: 100%|██████████| 70/70 [00:55<00:00, 1.27it/s, epoch=69, loss=0.0262, lr=0.001]
tensor(0.9555)
Epoch: 100%|██████████| 70/70 [1:02:23<00:00, 53.48s/it, epoch=69, loss=0.0214, lr=0.001]
tensor(0.9493)
How I can fix it problem? maybe I need Gradient Clipping or something similar to this?
my model is simple it is consist from
conv1d; ReLu,Dropout1d … and Linear