Training not working

Prakyath_Kantharaju · August 30, 2021, 1:22am

I am trying to train an activity recognition system using PyTorch, but the network is not training and loss is not dropping, even though I have a similar model working perfectly on Keras. I have provided code for the training loop, model class, and dataset class here. Can you help me why the loss is not dropping (accuracy is not increasing)

main training loop

# create dataset
dataset = IMU_dataset()


train_loader = DataLoader(dataset=dataset,
                          batch_size=40,
                          shuffle=True,
                          num_workers=2)


num_epochs = 100
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print(total_samples, n_iterations)
input_shape = 3
output_index = 6
device = torch.device('cpu')
model = HARmodel(input_shape, output_index).to(device)
model.float()
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1) 
 
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        # origin shape: [40, 3, 400]
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(inputs.to(device).float())
        loss = criterion(outputs, labels.long())
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
#         if (i+1) % 5 == 0:
#             print(f'loss: {loss.item()}')
    print(model.calculate_accuracy(dataset.x_data, dataset.y_data), model.calculate_loss(dataset.x_data, dataset.y_data, criterion))

Here is the model class.

class HARmodel(nn.Module):
    """Model for human-activity-recognition."""
    def __init__(self, input_size, num_classes):
        super().__init__()

        # Extract features, 1D conv layers
        self.layer_1   = nn.Conv1d(input_size, 100, 10, stride=1)
        self.activation_relu = nn.ReLU()
        self.layer_2 = nn.Conv1d(100, 100, 10, stride=1)
        self.layer_3 = nn.Conv1d(100, 100, 10, stride=1)
        self.layer_4 = nn.MaxPool1d(2, stride=3)
        self.layer_5 = nn.Dropout(p=0.2)
        self.layer_6 = nn.Conv1d(100, 160, 10, stride=1)
        self.layer_7 = nn.Conv1d(160, 160, 10, stride=1)
        self.layer_8 = nn.Conv1d(160, 160, 10, stride=1)
#         self.layer_9 = nn.AvgPool1d(97)
        self.layer_10 = nn.Dropout(p=0.5)
        self.layer_11 = nn.Linear(160, 6)
        self.activation_softmax = nn.Softmax()


    def forward(self, x):
        x = self.layer_1(x)
        x = self.activation_relu(x)
        x = self.layer_2(x)
        x = self.activation_relu(x)
        x = self.layer_3(x)
        x = self.activation_relu(x)
        x = self.layer_4(x)
        x = self.layer_5(x)
        x = self.layer_6(x)
        x = self.activation_relu(x)
        x = self.layer_7(x)
        x = self.activation_relu(x)
        x = self.layer_8(x)
        x = self.activation_relu(x)
        self.layer_9 = nn.AvgPool1d(x.shape[2])
        x = self.layer_9(x)
        x = self.layer_10(x)
        y = self.layer_11(x.view(x.shape[0],x.shape[1]))
        y = self.activation_softmax(y)
        return y
    
    def calculate_accuracy(self, X,y):
        with torch.no_grad():
            output = model.forward(X.float())
            max_index = output.max(dim = 1)[1]
            true_output = y.type(torch.LongTensor)
            result =  (max_index == true_output).sum()/y.shape[0]
        return result.detach().numpy()
    
    def calculate_loss(self, X,y, crit):
        with torch.no_grad():
            output = model.forward(X.float())
            max_index = output.max(dim = 1)[1]
            true_output = y.type(torch.LongTensor)
            
        return crit(output, true_output).item()

Here is the dataset class:

class IMU_dataset(Dataset):

    def __init__(self):

        
        self.n = X.shape[0]
        self.x_data = torch.from_numpy(X.reshape(-1,3,400)) 
        self.y_data = torch.from_numpy(y) 

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n

ptrblck · August 30, 2021, 5:03am

nn.CrossEntropyLoss expects raw logits as the model output, so you would have to remove the nn.Softmax as the last activation from your model.

Prakyath_Kantharaju · August 30, 2021, 5:17am

Thank you for your reply, @ptrblck

I have updated that, Here is my accuracy and loss (1st column is accuracy and 2nd is loss) Still no change

0.3315678 1.4387224912643433
0.3506356 1.3252450227737427
0.35911018 1.3104709386825562
0.375 1.3027973175048828
0.34957626 1.3020350933074951
0.34957626 1.301633358001709
0.3516949 1.2986819744110107
0.34957626 1.298031210899353
0.34957626 1.297572374343872
0.3516949 1.303324580192566
0.3516949 1.2982518672943115
0.34957626 1.2960001230239868
0.3516949 1.300429105758667
0.34957626 1.3036184310913086
0.3516949 1.2957249879837036
0.34957626 1.2951198816299438
0.3516949 1.303584337234497
0.3516949 1.295042634010315
0.34957626 1.298003911972046
0.3516949 1.2967694997787476
0.3516949 1.3020329475402832
0.3516949 1.2952202558517456
0.3516949 1.2989294528961182
0.3516949 1.2989754676818848
0.3516949 1.2943629026412964
0.3516949 1.2956234216690063
0.3516949 1.2941797971725464
0.3516949 1.294808268547058
0.3516949 1.2965195178985596
0.3516949 1.2945359945297241
0.3516949 1.2992956638336182
0.34957626 1.3036137819290161