Hi,
i’m working with an RNN for Image Classification, my problem which i assume is that my loaded Images are 3 Channels RGB, if i only had one channel it would work i guess. Since it has 3 channels the input_batchsize is 3 times higher than the target. Any ideas how i can solve this?
I also checked this post which helped me already this far. But im stuck with the 3 channels now.
The Input Shape is torch.Size([64, 3, 224, 224])
X after permute is torch.Size([224, 192, 224])
My parameters:
parameters
- BATCH_SIZE = 64
- N_STEPS = 28
- N_INPUTS = 224
- N_CHANNELS = 3
- N_NEURONS = 150
- N_OUTPUTS = 21
- N_EPOCHS = 5
- N_PIXELS = 224
class ImageRNN(nn.Module):
def __init__(self, batch_size, n_steps, n_inputs, n_neurons, n_outputs):
super(ImageRNN, self).__init__()
self.n_neurons = n_neurons
self.batch_size = batch_size
self.n_steps = n_steps
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.basic_rnn = nn.RNN(self.n_inputs, self.n_neurons)
self.FC = nn.Linear(self.n_neurons, self.n_outputs)
def init_hidden(self,):
# (num_layers, batch_size, n_neurons)
return (torch.zeros(1, self.batch_size, self.n_neurons))
def forward(self, X):
# transforms X to dimensions: n_steps X batch_size X n_inputs
print(X.shape)
X = X.permute(1, 0, 2)
print(X.shape)
print(X.size)
self.batch_size = X.size(1)
self.hidden = self.init_hidden()
lstm_out, self.hidden = self.basic_rnn(X, self.hidden)
out = self.FC(self.hidden)
return out.view(-1, self.n_outputs) # batch_size X n_output
for i, data in enumerate(trainloader):
# zero the parameter gradients
optimizer.zero_grad()
# reset hidden states
model.hidden = model.init_hidden()
# get the inputs
inputs, labels = data
inputs = inputs.view(-1, N_PIXELS,N_PIXELS)
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()