Getting same result in each Epoch

McsLk · May 25, 2019, 5:44pm

Hello everyone, my name is Marcin and I’m quite new on Deep Learning. I tried to develop my first model in PyTorch based on Pneumonia analysis dataset. I’m not corious why I’m getting same result on each epoch during the training and validating session. Would someone’s mind to explain me my mistakes to let me improve my knowledge? Thanks!

train_data_path = './chest_xray/train/'
test_data_path = './chest_xray/val/'

train_dataset = torchvision.datasets.ImageFolder(
        root=train_data_path,
        transform=transform,
    )
train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        num_workers=0,
        shuffle=True
    )


test_dataset = torchvision.datasets.ImageFolder(
        root=test_data_path,
        transform=transform,
    )
test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=batch_size,
        num_workers=0,
        shuffle=False
    )

dataloaders = {
    "train" : train_loader,
    "validation" : test_loader
}

class MultilayerNeuralNet(nn.Module):
    def __init__(self, input_size):
        super(MultilayerNeuralNet, self).__init__()    
        # hidden layers sizes, you can play with it as you wish!
        hidden1 = 64
        hidden2 = 32
        hidden3 = 16
        self.pool = nn.MaxPool2d(2, 2)
        self.conv1 = nn.Conv2d(3, 8, 3)        
        self.conv2 = nn.Conv2d(8, 16, 3)
        self.fc1 = nn.Linear(conv_size, hidden1) 
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden1, hidden2) 
        self.relu2 = nn.ReLU()       
        self.fc3 = nn.Linear(hidden2, hidden3)
        self.relu3 = nn.ReLU()
        self.output = nn.Linear(hidden3, 2) 
    
    def forward(self, x):
        '''
        This method takes an input x and layer after layer compute network states.
        Last layer gives us predictions.
        '''
        
        x = x.view(batch_size,channels,hight,width)
        #print(x.shape)
        state = self.pool(F.relu(self.conv1(x)))
        #print("Po pierwszym Conv2d: ",state.shape)
        state = self.pool(F.relu(self.conv2(state)))
        #print("Po drugim Conv2d: ",state.shape)
        state=state.view(-1,conv_size)
        #print("Przed Linear: ",state.shape)
        state = self.fc1(state)
        state = self.relu1(state)
        state = self.fc2(state)
        state = self.relu2(state)
        state = self.fc3(state)
        state = self.relu3(state)
        state = self.output(state)
        return state

# Model, loss and optimizer
model = MultilayerNeuralNet(input_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# set our model in the training mode
def train_model(model, criterion, optimizer, num_epochs):
    liveloss = PlotLosses()
    model = model.to(device)
    
    for epoch in range(num_epochs):
        logs = {}
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            correct = 0
            epoch_loss = 0
            
            
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.reshape(-1, input_size).to(device)
            labels = labels.to(device)
            prediction = model(inputs)
            loss = criterion(prediction, labels)
            
            if phase == 'train':
                optimizer.zero_grad()
                loss.backward()
                optimizer.step() 

            _, predicted = torch.max(prediction, 1)
            epoch_loss += loss.detach() * inputs.size(0)
            correct += torch.sum(predicted == labels.data)
        
        epoch_loss = epoch_loss / len(train_loader.dataset)
        acc = correct.float() / len(test_loader.dataset)
        
        
        prefix = ''
        if phase == 'validation':
            prefix = 'val_'
            
        logs[prefix + 'log loss'] = epoch_loss.item()
        logs[prefix + 'accuracy'] = acc.item()
        print(f'Epoch [{epoch+1}/{num_epochs}]], Loss: {epoch_loss:.4f} Test acc: {acc}')

train_model(model, criterion, optimizer, num_epochs)

The output of the last block is:

Epoch [1/50]], Loss: 0.0022 Test acc: 0.5
Epoch [2/50]], Loss: 0.0022 Test acc: 0.5
Epoch [3/50]], Loss: 0.0022 Test acc: 0.5
Epoch [4/50]], Loss: 0.0022 Test acc: 0.5
Epoch [5/50]], Loss: 0.0022 Test acc: 0.5
Epoch [6/50]], Loss: 0.0022 Test acc: 0.5
Epoch [7/50]], Loss: 0.0022 Test acc: 0.5
Epoch [8/50]], Loss: 0.0022 Test acc: 0.5
Epoch [9/50]], Loss: 0.0022 Test acc: 0.5
Epoch [10/50]], Loss: 0.0022 Test acc: 0.5

ptrblck · May 27, 2019, 4:06pm

It looks like the indentation of your code might be wrong and you are not training the model at all.
I’m not sure if you’ve added this issue by posting into the board, but the dataloader for loop should be insice the phase loop.
If you are using the same code as posted here, you’ll just perform the evaluation without training. Could you check that?

McsLk · May 27, 2019, 4:36pm

Thank you for your answer! Could you precise which part of code you’re talking about exactly? Loop containing inputs and labels variables?

ptrblck · May 27, 2019, 4:50pm

I think it’s easier to comment in the code:

def train_model(model, criterion, optimizer, num_epochs):
    liveloss = PlotLosses()
    model = model.to(device)
    
    for epoch in range(num_epochs):
        logs = {}
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            correct = 0
            epoch_loss = 0
            
            # This part was on the same indentation level as the outer loop
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.reshape(-1, input_size).to(device)
                labels = labels.to(device)
                prediction = model(inputs)
                loss = criterion(prediction, labels)
            
                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step() 

                _, predicted = torch.max(prediction, 1)
                epoch_loss += loss.detach() * inputs.size(0)
                correct += torch.sum(predicted == labels.data)
        
            epoch_loss = epoch_loss / len(train_loader.dataset)
            acc = correct.float() / len(test_loader.dataset)
        
        
            prefix = ''
            if phase == 'validation':
                prefix = 'val_'
            
            logs[prefix + 'log loss'] = epoch_loss.item()
            logs[prefix + 'accuracy'] = acc.item()
            print(f'Epoch [{epoch+1}/{num_epochs}]], Loss: {epoch_loss:.4f} Test acc: {acc}')

Could you try this code and check, if your model starts to learn something?

McsLk · May 27, 2019, 4:51pm

Thank you, I’ll check it soon.

edit: Thanks, it worked!