Hi,
I have array of rgb images with shape is (100, 3, 256, 256 ) (batch size,C,W,H) with shape of labels like (100, 7) (Batch size, labels parameters). labels are hot vector. but I am getting error like “Expected input batch_size (300) to match target batch_size (100), when it tries to calculate the cross entropy loss. would you please tell me what should i change?
class RNNGRO(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
super(RNNGRO, self).__init__()
self.num_layers = num_layers
self.hidden_dim = hidden_dim
#self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
57# -> x needs to be: (batch_size, seq, input_size)
# or:
#self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Set initial hidden states (and cell states for LSTM)
#print("input value:",x.shape)
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)
#print("init value h0:",h0.shape)
#print("init value c0:",c0.shape)
# Forward propagate RNN
#out, _ = self.rnn(x, h0)
# or:
out, _ = self.lstm(x, (h0,c0))
#print("value ltsm exit:",out.shape)
# out: tensor of shape (batch_size, seq_length, hidden_size)
# out: (n, 256, 256)
# Decode the hidden state of the last time step
out = out[:, -1, :]
#print("out for decode:",out.shape)
# out: (n, 256)
out = self.fc(out)
#print("out for linear:",out.shape)
# out: (n, 7) parameters
return out
parameters to LSTM
BATCH_SIZE = 300
INPUT_SIZE = 256
SEQUENCE_LENGHT = 256
HIDDEN_SIZE = 100
NUM_NEURONS = 4
CHANNELS = 3
NUM_CLASSES = 7
LEARNING_RATE = 0.001
NUM_EPOCHS = 100
# Model instance
model = RNNGRO(INPUT_SIZE, HIDDEN_SIZE,
NUM_NEURONS, NUM_CLASSES).to(device)
#criterion
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE,momentum=0.9)
Training
n_total_steps = len(train_loader)
since = time.time()
for epoch in range(NUM_EPOCHS):
for i, (images, labels) in enumerate(train_loader):
# origin shape: [N, 4, 256, 256] //aqui tengo el problema
# resized: [N, 256, 256]
#print("image moment", images.shape)
images = images.reshape(-1,SEQUENCE_LENGHT,INPUT_SIZE).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images).to(device)
print(outputs.shape)
print(labels.shape)
print(labels.float().view(-1,7).shape)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print(
f'Epoch [{epoch+1}/{NUM_EPOCHS}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
The print outputs.shape: torch.Size([900, 7])
The print labels.shape: torch.Size([300, 7])