Hey! I’m having some issues with a CNN model I’m trying to recreate from a paper, I’m hoping somebody could help me out No matter what I do, the model just doesn’t learn, and I’m not sure why. I’ve tried with different optimizers (Adam & SGD with momentum), but the loss and accuracy values just don’t change at all. Here’s a small example on how it just doesn’t change (stays the same at ~2.080, barely changes no matter the number of epochs):
Epoch 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:30<00:00, 0.53it/s, v_num=4, train_loss_step=2.070, val_loss=2.070, train_loss_epoch=3.170]
Epoch 1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:05<00:00, 2.97it/s, v_num=4, train_loss_step=2.080, val_loss=2.070, train_loss_epoch=2.080]
Epoch 2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:05<00:00, 2.77it/s, v_num=4, train_loss_step=2.060, val_loss=2.060, train_loss_epoch=2.080]
Epoch 3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:05<00:00, 2.77it/s, v_num=4, train_loss_step=2.060, val_loss=2.060, train_loss_epoch=2.080]
Epoch 4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:05<00:00, 2.77it/s, v_num=4, train_loss_step=2.060, val_loss=2.060, train_loss_epoch=2.080]
Here’s the model, for reference:
class Modelo(L.LightningModule):
def __init__(self):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.05, inplace=True),
nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.05, inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.05, inplace=True),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.05, inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.05, inplace=True),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.05, inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.05, inplace=True),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.05, inplace=True),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.05, inplace=True),
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(256 * 5 * 5, 256),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(256, 256),
nn.ReLU(inplace=True),
#nn.Dropout(p=0.5),
nn.Linear(256, 8)
)
for layer in self.features:
if type(layer) == nn.Conv2d:
nn.init.xavier_uniform_(layer.weight)
for layer in self.classifier:
if type(layer) == nn.Linear:
nn.init.xavier_uniform_(layer.weight)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return F.log_softmax(x)
def training_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = F.cross_entropy(logits, y)
self.log('train_loss', loss, on_epoch=True, on_step=True,prog_bar=True)
accu = ((argmax(logits, dim=1) == y).sum()/x.shape[0]).item()
self.log('train_accu', accu, on_epoch=True, on_step=True, prog_bar=True)
f1 = F1Score(task="multiclass", num_classes=8)
f1_score = f1(logits, y)
self.log('train_f1', f1_score, on_epoch=True, on_step=True, prog_bar=True)
return loss
def configure_optimizers(self):
#return optim.SGD(self.parameters(), lr = 0.0001, momentum = 0.9, weight_decay = 0.0001)
return optim.Adam(self.parameters(), lr=0.001)
def validation_step(self, val_batch, batch_idx):
x, y = val_batch
logits = self(x)
loss = F.cross_entropy(logits, y)
self.log('val_loss', loss, on_epoch=True, on_step=False,prog_bar=True)
accu = ((argmax(logits, dim=1) == y).sum()/x.shape[0]).item()
self.log('val_accu', accu, on_epoch=True, on_step=True, prog_bar=True)
f1 = F1Score(task="multiclass", num_classes=8)
f1_score = f1(logits, y)
self.log('val_f1', f1_score, on_epoch=True, on_step=True, prog_bar=True)
And training code:
train_transform = transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.Resize((42, 42)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(45),
transforms.RandomResizedCrop(42, scale=(0.875, 1.125), ratio=(1.0, 1.0)),
transforms.ToTensor()
])
val_transform = transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.Resize((42, 42)),
transforms.ToTensor()
])
train_dataset = ImageFolder(root=train_dataset_dir, transform=train_transform)
val_dataset = ImageFolder(root=val_dataset_dir, transform=val_transform)
batch_size = 128
train_loader = DataLoader(train_dataset, shuffle=True,batch_size=batch_size)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size)
model = module.Modelo()
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.001, patience=5, verbose=False, mode="max")
trainer = L.Trainer(accelerator="tpu", devices="auto", strategy="auto", callbacks=[early_stop_callback],max_epochs=2000,default_root_dir=model_dir)
trainer.fit(model, train_loader, val_loader)
I’m really lost here so if anyone could help me out I’d greatly appreciate it!