Loss Decreasing but Accuracy Remaining the Sane

Satyen_Singh · December 20, 2021, 3:09pm

I am trying to predict opioid prescription overdoses using a dataset I found on kaggle.

After cleaning up my data, I am left with 241 features and a total of 19807 rows in my training set. I have loaded my train_data and test_data in to dataloaders and have passed them to my RNN. My Loss is decreasing but my accuracy is remaining the exact same. What am I doing wrong?

Loading data to dataloaders

## train data
class TrainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)


train_data = TrainData(torch.FloatTensor(X_train), 
                       torch.FloatTensor(y_train))
## test data    
class TestData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)
    

test_data = TestData(torch.FloatTensor(X_test),
                     torch.FloatTensor(y_test))

train_loader = DataLoader(dataset=train_data, batch_size=100, shuffle=True)

test_loader = DataLoader(dataset=test_data, batch_size=1)

RNN Architecture

class RNN(nn.Module):
  def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
    super(RNN, self).__init__()

    # Defining parameters
    self.hidden_dim = hidden_dim
    self.layer_dim = layer_dim

    # Defining Layers
    # RNN Layer
    self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, 
                      batch_first=True, nonlinearity='relu')

    # Fully connected Layer
    self.fc = nn.Linear(hidden_dim, output_dim)

  def forward(self, x):
    batch_size = x.size(0)

    # initializing hidden state for first input using method defined below
    hidden = self.init_hidden(batch_size).requires_grad_()

    # Passing in input and hidden state into model and getting output
    out, hidden = self.rnn(x, hidden.detach())

    # Reshaping outputs such that it can be fit into fully connected layer
    out = out.contiguous().view(-1, self.hidden_dim)
    out = self.fc(out)
    

    return out

  def init_hidden(self, batch_size):
    # generate the first hidden state of 0s that will be propogated in forward pass
    # send tensor holding the hidden state to device specified earlier
    hidden = torch.zeros(self.layer_dim, batch_size, self.hidden_dim)
    return hidden

input_dim = 241

hidden_dim = 100

layer_dim = 2

output_dim = 1

model = RNN(input_dim=input_dim,
            hidden_dim=hidden_dim,
            layer_dim=layer_dim,
            output_dim=output_dim)

print(model)
# model.to(device)

n_epochs = 100
lr = 0.01

# Define Loss, Optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

My training function:

iter = 0
for epoch in range(num_epochs):
    for i, (data, labels) in enumerate(train_loader):
        model.train()
        # Load images as tensors with gradient accumulation abilities
        # images = images.view(-1, seq_dim, input_dim).requires_grad_()
        # print(images.shape)
        # print(i)
        if i != 198:
          data = data.view(100, 1, 241).requires_grad_()
        else: 
          data = data.view(7, 1, 241).requires_grad_()



        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        # print(model(images))
        outputs = model(data)
        # print(outputs.shape)
        outputs = torch.sigmoid(outputs)
        

        labels = labels.unsqueeze(1)
        # print(labels)
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 500 == 0:
            model.eval()
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for data, labels in test_loader:
                # Resize images
                data = data.view(1, 1, 241)

                # Forward pass only to get logits/output
                outputs = model(data)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()
            print(correct)

            accuracy = 100 * correct / total
            print(accuracy)

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

Output:

Iteration: 500. Loss: 0.20433291792869568. Accuracy: 41.11470031738281
Iteration: 1000. Loss: 0.2545611560344696. Accuracy: 41.11470031738281
Iteration: 1500. Loss: 0.2528284788131714. Accuracy: 41.11470031738281
Iteration: 2000. Loss: 0.3239484131336212. Accuracy: 41.11470031738281
Iteration: 2500. Loss: 0.3239382803440094. Accuracy: 41.11470031738281
Iteration: 3000. Loss: 0.2597212493419647. Accuracy: 41.11470031738281
Iteration: 3500. Loss: 0.4951004087924957. Accuracy: 41.11470031738281
Iteration: 4000. Loss: 0.3577985465526581. Accuracy: 41.11470031738281
Iteration: 4500. Loss: 0.3011106252670288. Accuracy: 41.11470031738281
Iteration: 5000. Loss: 0.3104074001312256. Accuracy: 41.11470031738281
Iteration: 5500. Loss: 0.2941060960292816. Accuracy: 41.11470031738281
Iteration: 6000. Loss: 0.21687862277030945. Accuracy: 41.11470031738281
Iteration: 6500. Loss: 0.36724144220352173. Accuracy: 41.11470031738281
Iteration: 7000. Loss: 0.16310080885887146. Accuracy: 41.11470031738281
Iteration: 7500. Loss: 0.2854510545730591. Accuracy: 41.11470031738281
Iteration: 8000. Loss: 0.36213111877441406. Accuracy: 41.11470031738281
Iteration: 8500. Loss: 0.44782283902168274. Accuracy: 41.11470031738281
Iteration: 9000. Loss: 0.2636875808238983. Accuracy: 41.11470031738281
Iteration: 9500. Loss: 0.2781040668487549. Accuracy: 41.11470031738281

Please let me know where I am going wrong, thank you!

AbdulsalamBande · January 22, 2022, 6:09pm

I am not sure why the accuracy is the same, but do not forget to do model.eval() before testing and model.train() after testing.