Model does not learn at all

Hello, could someone please help me? I am quite new to pytorch, althoughI have programmed several neural nets for biological applications. However, I seem to be making some error over and over again because I tried several more complicated networks and they did not learn at all. I have now made quite a simple one for the MNIST task and I am still unable to find my mistake. Could someone please have a look at the code? Thanks!

import torch
import torch as tc
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch.autograd import Variable
import torch.autograd
import torchvision.datasets as datasets
import torch.optim as optim

transform_train = transforms.Compose([
#    normalize
transform_test = transforms.Compose([
#    normalize

MNIST_train = datasets.MNIST(root='MNIST', train=True, download=True, transform=transform_train)
MNIST_test = datasets.MNIST(root='MNIST', train=False, download=True, transform=transform_test)

trainloader =, batch_size=256, shuffle=False)
testloader =, batch_size=256, shuffle=False)

class Block(nn.Module):
    def __init__(self, in_channels, out_channels):

        self.layers = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size = 3),
            nn.Conv2d(out_channels, out_channels, kernel_size=3),
            nn.Conv2d(out_channels, out_channels, kernel_size=3),

    def forward(self, x):
        x = F.leaky_relu(self.layers(x))
        return x

class Encoder(nn.Module):
    def __init__(self, in_channels, out_channels):

class Decoder(nn.Module):
    def __init__(self, in_features, n_classes):
        self.avg = nn.AdaptiveAvgPool2d((1, 1))
        self.decoder = nn.Sequential(
            nn.Linear(in_features, in_features),
            nn.Linear(in_features, n_classes))

    def forward(self, x):
        x = self.avg(x)
        x = x.view(x.size(0), -1)
        x = self.decoder(x)
        return x

class Netz(nn.Module):
    def __init__(self,in_channels, num_classes):
        self.encoder = nn.Sequential(
                        Block(1, 32),
        self.decoder = Decoder(32, num_classes)

    def forward(self,x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

netz = Netz(1,10)

device = tc.device('cuda' if tc.cuda.is_available() else 'cpu')
optimizer = optim.SGD(netz.parameters(), lr = 0.001, momentum = 0.5)
criterion = F.cross_entropy

def train():
    correct = 0
    for i, (data,target) in enumerate(trainloader):

        data =
        target =

        out = netz(data)
        loss = criterion(out, target)

        prediction =, keepdim=True)[1]
        correct += prediction.eq(
    precision = correct.item() / len(trainloader.dataset)

for _ in range(100):

I’m not sure exactly where your model is going off, but you can find a more standard approach for working with mnist at if that is helpful for your use case. One thing that looks interesting to me in your model is that you don’t have activations between your convolutional layers in your Blocks, but I don’t know if that alone would cause it to not learn anything.

Thanks alot, I will look into that. I changed the optimizer to Adam and tried to run it for more epochs and in the end it converged. But yeah, I forgot some activation functions, will fix that, guess that will improve things.