I made a custom CNN architecture and when I try training the model, the validation accuracy and loss are not improving and the training accuracy is improving slightly. I also tried out with a pretrained model and it’s working fine for that.
This is the architecture below.
class Custom(nn.Module):
def __init__(self, num_classes):
super(Custom, self).__init__()
self.conv1 = nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=2)
self.conv2 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=0)
self.conv3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=0)
self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=0)
self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=0)
self.conv6 = nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=0)
self.bn_relu_max1 = nn.Sequential(
nn.BatchNorm2d(64*2),
nn.ReLU(),
nn.MaxPool2d(4, 2, 1)
)
self.bn_relu_max2 = nn.Sequential(
nn.BatchNorm2d(128*2),
nn.ReLU(),
nn.MaxPool2d(4, 2, 1)
)
self.bn_relu_max3 = nn.Sequential(
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(4, 2, 1)
)
self.bn_relu_max4 = nn.Sequential(
nn.BatchNorm2d(256*2),
nn.ReLU(),
nn.MaxPool2d(4, 2, 1)
)
self.bn_relu_max5 = nn.Sequential(
nn.BatchNorm2d(256*4),
nn.ReLU(),
nn.MaxPool2d(4, 2, 1)
)
self.bn_relu_max6 = nn.Sequential(
nn.BatchNorm2d(256*4),
nn.ReLU(),
nn.MaxPool2d(4, 2, 1)
)
self.adaptiveavgpool = nn.AdaptiveAvgPool2d((1,1))
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Dropout2d(0.5),
nn.Linear(1024, 128),
nn.ReLU(),
nn.Linear(128, num_classes))
def forward(self, input):
x = self.conv1(input)
x = self.bn_relu_max1(x)
x = self.conv2(x)
x = self.bn_relu_max2(x)
x = self.conv3(x)
x = self.bn_relu_max3(x)
x = self.conv4(x)
x = self.bn_relu_max4(x)
x = self.conv5(x)
x = self.bn_relu_max5(x)
x = self.conv6(x)
x = self.bn_relu_max6(x)
x = self.adaptiveavgpool(x)
x = self.classifier(x)
return x
criterion = nn.CrossEntropyLoss()
optimizer = bnb.optim.Adam8bit(model.parameters(), lr=0.001)#, momentum=0.9)
use_amp = True
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2)
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
The training and validation loop are below.
total_step = len(loader_train)
def calculate_accuracy(y_pred, y):
top_pred = y_pred.argmax(1, keepdim=True)
correct = top_pred.eq(y.view_as(top_pred)).sum()
acc = correct.float() / y.shape[0]
return acc
def train(model, iterator, optimizer, criterion, device):
liveloss = PlotLosses()
epoch_loss = 0
epoch_acc = 0
#running_loss = 0.0
#running_corrects = 0
#training_acc = 0
#train_acc = []
losses = []
#train_losses = []
train.lrs=[]
#total = 0
#correct = 0
model.train()
for (x, y) in tqdm(iterator, desc="Training", leave=False):
logs = {}
x = x.to(device)
y = y.to(device)
with autocast(enabled=use_amp):
optimizer.zero_grad()
y_pred = model(x)
loss = criterion(y_pred, y)
#loss = loss / accumulation_steps
losses.append(loss.item())
acc = calculate_accuracy(y_pred, y)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
epoch_loss += loss.item()
epoch_acc += acc.item()
train.lrs.append(optimizer.param_groups[0]['lr'])
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model, iterator, criterion, device):
epoch_loss = 0
epoch_acc = 0
running_loss = 0.0
running_corrects = 0
#eval_acc = []
#eval_loss = []
losses = []
evaluate.lrs_val = []
#total = 0
#correct = 0
model.eval()
with torch.no_grad():
for (x, y) in tqdm(iterator, desc="Evaluating", leave=False):
x = x.to(device)
y = y.to(device)
#with autocast(enabled=use_amp):
y_pred = model(x)
loss = criterion(y_pred, y)
losses.append(loss.item())
acc = calculate_accuracy(y_pred, y)
epoch_loss += loss.item()
epoch_acc += acc.item()
mean_loss = sum(losses)/len(losses)
lr_scheduler.step(mean_loss)
evaluate.lrs_val.append(optimizer.param_groups[0]['lr'])
return epoch_loss / len(iterator), epoch_acc / len(iterator)
EPOCHS=10
best_valid_loss = float('inf')
for epoch in trange(EPOCHS, desc="Epochs"):
start_time = time.monotonic()
train_loss, train_acc = train(model, loader_train, optimizer, criterion,device)
valid_loss, valid_acc = evaluate(model, loader_valid, criterion,device)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut3-model.pt')
end_time = time.monotonic()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
The dataset has 5 classes. I have performed data augmentation using albumentations to tackle class imabalance. Not sure why this happening. Any help would be appreciated.