Making RNN for image classification

Hi guys,

I have array of rgb images with shape (3000, 3, 96, 96 ) with shape of labels like (3000, 4). labels are hot vector. but I am getting error like “Expected input batch_size (150) to match target batch_size (50)” when it tries to calculate the loss. would you please tell me what should i change?

network:

class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim

        # Number of hidden layers
        self.layer_dim = layer_dim

        # Building your RNN
        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, input_dim)
        # batch_dim = number of samples per batch
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu')

        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initialize hidden state with zeros
        # (layer_dim, batch_size, hidden_dim)
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()

        # We need to detach the hidden state to prevent exploding/vanishing gradients
        # This is part of truncated backpropagation through time (BPTT)
        out, hn = self.rnn(x, h0.detach())

        out = self.fc(out[:, -1, :]) 
        return out

with parameters:

input_dim = 96
hidden_dim = 100
layer_dim = 1
output_dim = 4
model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  
print(len(list(model.parameters())))
print(list(model.parameters())[0].size())  # Input --> Hidden (A1)   torch.Size([100, 96])
print(list(model.parameters())[2].size())  # Input --> Hidden BIAS (B1)   torch.Size([100])
print(list(model.parameters())[1].size())  # Hidden --> Hidden (A3)   torch.Size([100, 100])
print(list(model.parameters())[3].size())  # Hidden --> Hidden BIAS(B3)  torch.Size([100])
print(list(model.parameters())[4].size())  # Hidden --> Output (A2)  torch.Size([4, 100])
print(list(model.parameters())[5].size())         # Hidden -> Output BIAS (B2)torch.Size([4])

with train loop of:

# Number of steps to unroll
seq_dim = 96  
num_epochs = 5
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader_fo):
        model.train()
        # Load images as tensors with gradient accumulation abilities
        images = images.float()
        images = images.view(-1, seq_dim, input_dim).requires_grad_()

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        # outputs.size() --> 100, 4
        outputs = model(images)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 500 == 0:
            model.eval()
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in vali_loader_fo:
                # Load images to a Torch tensors with gradient accumulation abilities
                images = images.view(-1, seq_dim, input_dim)

                # Forward pass only to get logits/output
                outputs = model(images)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()

            accuracy = 100 * correct / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

nn.CrossEntropyLoss won’t work with one-hot encoded targets and expects a LongTensor in the shape [batch_size] containing the class indices.

However, since this is not the current error, could you please print the output and target shape before calculating the loss?

Hi, mi name is nicolas and i have a similar problem, in my case the rnn tensor is torch.Size([300, 7])
and labels torch.Size([100, 7])

Answered here.