ZeroDivison Error After Training

Here’s my model

# functions to show an image

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, 5, 1, 2)
        self.filt1 = nn.Conv2d(1, 1, 2, 1, padding="valid")
        self.filt2 = nn.Conv2d(1, 1, 2, 1, padding="valid")
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(192, 128, 5, 1, 2)
        self.conv3 = nn.Conv2d(384, 128, 5, 1, 2)
        self.conv4 = nn.Conv2d(384, 128, 5, 1, 2)
        self.conv5 = nn.Conv2d(384, 128, 5, 1, 2)
        self.fc1 = nn.Linear(128 * 64, 8)
        self.fc2 = nn.Linear(8, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 16)
        self.fc5 = nn.Linear(16, 100)

    def forward(self, x):
        # First CNN LAYER
        y = self.pool(F.relu(self.conv1(x)))
        d = y.view(-1, 1, y.shape[2], y.shape[3])
        x = self.filt2(d)
        z = F.pad(x, (0, 1, 0, 1))
        x = self.filt1(d)
        x = F.pad(
            x, (0, 1, 0, 1)
        )  (128,64*3,x.shape[2],x.shape[3])
        x = x.view(4, -1, x.shape[2], x.shape[3])
        z = z.view(4, -1, z.shape[2], z.shape[3])
        x = torch.cat((x, y, z), dim=1)
        # Second CNN LAYER
        y = F.relu(self.conv2(x))
        d = y.view(-1, 1, y.shape[2], y.shape[3])
        x = self.filt2(d)
        z = F.pad(x, (0, 1, 0, 1))
        x = self.filt1(d)
        x = F.pad(x, (0, 1, 0, 1))
        x = x.view(4, -1, x.shape[2], x.shape[3])
        z = z.view(4, -1, z.shape[2], z.shape[3])
        x = torch.cat((x, y, z), dim=1)
        # Third CNN LAYER
        y = F.relu(self.conv3(x))
        d = y.view(-1, 1, y.shape[2], y.shape[3])
        x = self.filt2(d)
        z = F.pad(x, (0, 1, 0, 1))
        x = self.filt1(d)
        x = F.pad(x, (0, 1, 0, 1))
        x = x.view(4, -1, x.shape[2], x.shape[3])
        z = z.view(4, -1, z.shape[2], z.shape[3])
        x = torch.cat((x, y, z), dim=1)
        # Fourth CNN LAYER
        y = self.pool(F.relu(self.conv4(x)))
        d = y.view(-1, 1, y.shape[2], y.shape[3])
        x = self.filt2(d)
        z = F.pad(x, (0, 1, 0, 1))
        x = self.filt1(d)
        x = F.pad(x, (0, 1, 0, 1))
        x = x.view(4, -1, x.shape[2], x.shape[3])
        z = z.view(4, -1, z.shape[2], z.shape[3])
        x = torch.cat((x, y, z), dim=1)
        # Fifth CNN LAYER
        x = F.relu(self.conv5(x))

        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x

net = Net().to(device)
with torch.no_grad():
    net.filt1.weight = nn.Parameter(torch.tensor([[[[1.0, 0.0], [0.0, -1.0]]]]))
    net.filt2.weight = nn.Parameter(torch.tensor([[[[0.0, 1.0], [-1.0, 0.0]]]]))
net = Net().to(device)


Here's my evaluation code

PATH = "./cifar_net.pth"
torch.save(net.state_dict(), PATH)

dataiter = iter(testloader)
images, labels = dataiter.next()
images, labels = images.to(device), labels.to(device)

# print images
print("GroundTruth: ", " ".join("%5s" % classes[labels[j]] for j in range(4)))

net = Net()
net = Net().to(device)
net.load_state_dict(torch.load(PATH))

outputs = net(images).to(device)

_, predicted = torch.max(outputs, 1)

print("Predicted: ", " ".join("%5s" % classes[predicted[j]] for j in range(4)))

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        
        outputs = net(images).to(device)
        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(
    "Accuracy of the network on the 1000 test images: %d %%" % (100 * correct / total)
)

correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images).to(device)
        _, predictions = torch.max(outputs, 1)
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1
print(total_pred)
# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f} %".format(classname, accuracy))
GroundTruth:    cat  ship  ship plane
Predicted:   ship  ship  ship  ship
Accuracy of the network on the 1000 test images: 10 %
---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
/tmp/ipykernel_16208/3595331765.py in <module>
     56 # print accuracy for each class
     57 for classname, correct_count in correct_pred.items():
---> 58     accuracy = 100 * float(correct_count) / total_pred[classname]
     59     print("Accuracy for class {:5s} is: {:.1f} %".format(classname, accuracy))

ZeroDivisionError: float division by zero
1 Like

From these lines, it seems total_pred remained 0 for some class, thus leading to ZeroDivisionError probably.
You can add some print statements and check why this is the case?

Well actually by lowering the number of epochs that my training went It fixed itself on its own. Even though I don’t know what caused the first error to happen.I know what zero division is but not so sure how it came to be. After going for 25 epochs the loss went up from 0.3 to 2.5 that might be the case.