No acc in the validation

I’m building a Cnn_lstm for classification and it gives me pretty good accuracy in the training but in the validation or testing it gives me pretty bad accuracy

class EncoderCNN(nn.Module):
    def __init__(self, embed_size):
        """Load the pretrained ResNet-152 and replace top fc layer."""
        super(EncoderCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, kernel_size=2, stride=2, padding=3,
        resnet = models.resnet152(pretrained=True)
        modules = list(resnet.children())[:-1]      # delete the last fc layer.
        self.resnet = nn.Sequential(*modules)
        self.linear = nn.Linear(resnet.fc.in_features, embed_size) = nn.BatchNorm1d(embed_size, momentum=0.01)
    def forward(self, images):
        """Extract feature vectors from input images."""
        C,batch_size, H, W = images.size()
        images = images.view(batch_size , C, H, W)
        images = self.conv1(images)
        with torch.no_grad():
            features = self.resnet(images)
        features = features.reshape(features.size(0), -1)
        features =
        return features

class DecoderRNN(nn.Module):
    def __init__(self, embed_size, hidden_size, vocab_size, num_layers, max_seq_length=20):
        """Set the hyper-parameters and build the layers."""
        super(DecoderRNN, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size , vocab_size)
        self.max_seg_length = max_seq_length
    def forward(self, features, captions, lengths):
        """Decode image feature vectors and generates captions."""
        embeddings = self.embed(captions)
        embeddings =, embeddings), 1)
        packed = pack_padded_sequence(embeddings, lengths, batch_first=True) 
        hiddens, _ = self.lstm(packed)
        outputs = self.linear(hiddens[0])
        return outputs
    def sample(self, features, states=None):
        """Generate captions for given image features using greedy search."""
        inputs = features.unsqueeze(1)
        hiddens, states = self.lstm(inputs, states)          # hiddens: (batch_size, 1, hidden_size)
        outputs = self.linear(hiddens[0])            # outputs:  (batch_size, vocab_size)

        return outputs

I’m using the sample method for testing if anyone has any Ideas what’s wrong here please tell me