A strange error result given by softmax function in CNN network

when I put an image inside a CNN network net by net(image), it gives two different answers by two different ways, I wonder if it is a bug

result = net(image)

softmax_result1 = F.softmax(result, dim=1)
print(softmax_result1)
softmax_result2 = F.softmax(net(image), dim=1)
print(softmax_result2)

and the result is

tensor([[0.5803, 0.0120, 0.0462, 0.0072, 0.0107, 0.0356, 0.0964, 0.1650, 0.0118,
         0.0349]], grad_fn=<SoftmaxBackward0>)
tensor([[0.4298, 0.1368, 0.0622, 0.0018, 0.0571, 0.0178, 0.0075, 0.2660, 0.0081,
         0.0128]], grad_fn=<SoftmaxBackward0>)

however, this does not happen when I try with only linear network

here is the full code I used, where the image is a 448,448 image

import toruch
import torch.nn as nn
import torch.nn.functional as F
import cv2

torch.manual_seed(42)

def initialize_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        nn.init.xavier_uniform_(m.weight)
def VGG_block(num_convs, in_channels, out_channels):
    blk = []
    for _ in range(num_convs):
        blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        blk.append(nn.ReLU())
        in_channels = out_channels
    blk.append(nn.MaxPool2d(kernel_size=2, stride=2))
    
    return nn.Sequential(*blk)

def VGG(conv_arch):
    conv_blks = []
    in_channels = 1
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(VGG_block(num_convs, in_channels, out_channels))
        in_channels = out_channels

    return nn.Sequential(
        *conv_blks, nn.Flatten(),
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 10)
    )
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
net = VGG(conv_arch)
net.apply(initialize_weights)

image = cv2.imread('image.png')
image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
image = 255 - image
image = cv2.resize(image, (224, 224))
image = torch.tensor(image, dtype=torch.float32).reshape(1, 1, 224, 224)

result = net(image)

softmax_result1 = F.softmax(result, dim=1)
print(softmax_result1)
softmax_result2 = F.softmax(net(image), dim=1)
print(softmax_result2)

Hi Muling!

Dropout (when in training mode) pseudorandomly zeros out some elements
of the tensor passing through it, and does so differently on every forward call.

Try:

result = net(image)
softmax_result1 = F.softmax(result, dim=1)
print(softmax_result1)
softmax_result1 = F.softmax(result, dim=1)
print(softmax_result1)

softmax_result2 = F.softmax(net(image), dim=1)
print(softmax_result2)
softmax_result2 = F.softmax(net(image), dim=1)
print(softmax_result2)

net.eval()   # turns off Dropout`

result = net(image)
softmax_result1 = F.softmax(result, dim=1)
print(softmax_result1)

softmax_result2 = F.softmax(net(image), dim=1)
print(softmax_result2)

Best.

K. Frank

You might try:

with torch.no_grad():
    net.eval()
    result=net(image)
    ...

That will shut off any training modules, such as dropout or batchnorm.