Pretrained resnet50 is display weird behaviour

Hi, I am using the esc50 dataset and I converted all the audio files into mel spectrograms. I have been using resnet50 as a pretrained network with a header network which has a hidden and output layer. The network is displaying strange behaviour. The test accuracy is bouncing randomly around the 2% mark while the loss is dropping 419 to 3.19 which is weird as the loss should be starting at 3.19.
image

I have linked the code for my model below:

config = {
    "epochs":100,
    "lr":0.03,
    "batch_size": 32,
    "classes":50,
    "architecture":"resnet50",
    "architectures":{"resnet18":models.resnet18, "resnet50":models.resnet50, "inception_v3":models.inception_v3},
    "datasets":"esc50",
    "pretrained":True,
    "header":Header,
    "custom_params":None,
    "device":d2l.try_all_gpus()
}
class Header(nn.Module):
    def __init__(self, input_nums, hidden_nums, output_nums, dropout, **kwargs):
        super(Header, self).__init__(**kwargs)
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(input_nums, hidden_nums)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.lin2 = nn.Linear(hidden_nums, output_nums)
    def forward(self, X):
        X = self.dropout(self.relu(self.linear(self.dropout(X))))
        return self.lin2(X)
class ResModel(nn.Module):
    def __init__(self, model, size=431):
        super(ResModel, self).__init__()
        self.res = model
        self.preprocess = transforms.Compose([
            transforms.RandomCrop(224),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.f = F.interpolate
        self.size = size
    def forward(self, X):
        X = self.f(X, size=self.size)
        X = X.repeat(1, 3, 1, 1)
        return self.res(self.preprocess(X))
def make_model(config):
    assert config["architecture"] in config["architectures"].keys(), "Model not found"
    if hasattr(models, config.get("architecture")):
        net = config["architectures"].get(config.get("architecture"))(pretrained=config["pretrained"])
        for param in net.parameters():
            param.requires_grad = False
        if config["header"] is not None:
            #TODO: Change this so that the input_nums is calculated automatically
            net.fc = config["header"](2048, 4096, config["classes"], 0.1)
        net = ResModel(net)
    else:
        net = architectures[config["architecture"]](config["custom_params"])
    net.apply(init_weights)
    return net.to(device=config["device"][0])
def make(config):
    #Load training data
    train_iter, test_iter = load_fold_mels(path, config["batch_size"])
    #CELoss for classificaion tasks
    loss = nn.CrossEntropyLoss(reduction='none')
    #Get our model from config file
    net = make_model(config)
    #Use Adam optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=config["lr"])
    
    return net, train_iter, test_iter, loss, optimizer

I guess you’ve calculated this expected initial loss by assuming random predictions?
If so, I don’t think you can assume that a pretrained model will output random classes (a randomly initialized one should).

1 Like