CNN+RNN batch validation produces undesired output


I have classification task. Input data is 3x256x256 image sequence. I want output of CNN pass to RNN and then at last timestamp get some probabilities.
Here is my model:

class Model(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Model, self).__init__()

        # Hyperparameters
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.feature_size = 256*7*7
        self.dropout_ratio = 0.5

        # Layers
        self.initial_hidden1 = nn.Parameter(torch.zeros(512))
        self.initial_hidden2 = nn.Parameter(torch.zeros(hidden_size))
        alex = alexnet(pretrained=True)
        self.feature_extractor = alex.features
        self.hidden1 = nn.GRUCell(self.feature_size, 512)
        self.hidden2 = nn.GRUCell(512, hidden_size)
        partial_hidden = round(hidden_size/2)
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size, partial_hidden),
            nn.Linear(partial_hidden, output_size)

    def forward(self, input, hidden=None):
        if hidden is None:
            hidden = self.new_hidden(input.size()[0])

        features = self.feature_extractor(input)
        features = features.view(-1, self.feature_size)
        hidden1 = self.hidden1(features, hidden[0])
        hidden2 = self.hidden2(hidden1, hidden[1])
        output = self.classifier(hidden2)

        if is False:
            output = torch.sigmoid(output)

        return output, (hidden1, hidden2)

    def new_hidden(self, batch=None):
        hidden1 = self.initial_hidden1
        if batch is not None:
            hidden1 = hidden1.repeat(batch, 1)

        hidden2 = self.initial_hidden2
        if batch is not None:
            hidden2 = hidden2.repeat(batch, 1)

        return hidden1, hidden2

The way I train is

batch_size = 16
seq_length = 30

for epoch in range(epochs):
    for i, data in enumerate(train_dataloader, 0):

        # images.shape == [16, 30, 3, 256, 256]
        # labels.shape == [16, 39]
        images, labels = data
        images, labels = images.cuda(), Variable(labels.cuda())
        hidden = None
        for j in range(images.size()[1]):
            image = Variable(images[:, j])
            output, hidden = model(image, hidden)

        # output.shape == [16, 39]

        loss = criterion(output, labels)

After each epoch, accuracy is measured on test data:


for i, data in enumerate(loader, 0):
    images, labels = data
    images, labels = images.cuda(), Variable(labels.cuda(), volatile=True)
    hidden = None

    for j in range(images.size()[1]):
        image = Variable(images[:, j], volatile=True)
        output, hidden = model(image, hidden)


However, I receive output that has exactly same output for each item in batch (each sequence has same prediction values). It seems crazy because in training loop, output differs for individual sequences from batch. And in training progress, while loss is changing, output in evaluation does not.
I noticed that even when I changed convolutional part of my model, nothing changed.
What am I doing wrong?
Thank you in advance.