Neural Network output does not change

Piet_Lego · December 27, 2018, 4:31pm

Hello,
I have a problem with my neural network for some time.

It is made to differentiate between 4 different distances of a person to the camera by using two pictures aligned into one as an input.

The neural network does train without any problems. But if I want to test it, it just outputs one neuron all the time.

The code is the following:

import torch
import torchvision
from torchvision import transforms
from PIL import Image
from os import listdir
import random
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import os
import sys

normalize = transforms.Normalize(
	mean=[0.485 , 0.456 , 0.406],
	std=[0.229, 0.224, 0.225]
)

transform = transforms.Compose([
	transforms.Resize(512),
	transforms.CenterCrop(512),
	transforms.ToTensor(),
	normalize])

trainingdatadirectory = 'Testdata'
testdatadirectory = 'Trainingdata'
train_data_list = []
train_data= []
target_list = []
files = listdir(trainingdatadirectory)


learning = True
loadNN = False
learningrate = 0.00001
epochs = 50

if (learning == True):
	for i in range(len(listdir(trainingdatadirectory))): #insert trainingfile-directory
		f = random.choice(files)
		#print("\r"+f, end="")
		files.remove(f)
		img = Image.open(trainingdatadirectory + "/" + f) #instert trainingfile-directory
		img_tensor = transform(img)
		train_data_list.append(img_tensor)
		isnear = 1 if 'near' in f else 0
		ismiddle = 1 if 'middle' in f else 0
		iswide = 1 if 'long' in f else 0
		isnothing = 1 if 'none' in f else 0
		target = (isnear,ismiddle,iswide, isnothing)        #redo the target definition
		#print("\r File: " + f + " target: " + str(target), end="")
		target_list.append(target)
		if len(train_data_list) >= 64:
			#print(torch.stack(train_data_list).size())
			train_data.append((torch.stack(train_data_list), target_list))
			train_data_list = []
			target_list = []
			print("Loaded: " , len(train_data), ' of ', int(len(listdir(trainingdatadirectory))) / 64)


class Netz(nn.Module):
	def __init__(self):
		super(Netz, self).__init__()
		self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
		self.conv2 = nn.Conv2d(6, 10, kernel_size=5)
		self.conv3 = nn.Conv2d(10, 16, kernel_size=5)
		self.fc1 = nn.Linear(57600, 12500)
		self.fc2 = nn.Linear(12500, 1000)
		self.fc3 = nn.Linear(1000, 4)
		self.conv1_bn = nn.BatchNorm2d(6)
		self.conv2_bn = nn.BatchNorm2d(10)
		self.conv3_bn = nn.BatchNorm2d(16)

	def forward(self, x):
		x = self.conv1(x)
		x = F.max_pool2d(x, 2)
		x = self.conv1_bn(x)
		x = F.relu(x)
		x = self.conv2(x)
		x = F.max_pool2d(x, 2)
		x = self.conv2_bn(x)
		x = F.relu(x)
		x = self.conv3(x)
		x = F.max_pool2d(x, 2)
		x = self.conv3_bn(x)
		x = F.relu(x)
		x = x.view(-1, 57600)
		x = F.relu(self.fc1(x))
		x = F.relu(self.fc2(x))
		x = self.fc3(x)
		return torch.sigmoid(x)

model = Netz()
model.cuda()

#optimizer = optim.SGD(model.parameters(), lr=learningrate)
optimizer = optim.Adam(model.parameters(), lr=learningrate)

def train(epoch):
	model.train()
	batch_id = 0
	for data, target in train_data:
		target = torch.FloatTensor(target)
		data = Variable(data)
		data = data.cuda()
		target = Variable(target)
		target = target.cuda()
		optimizer.zero_grad()
		out = model(data)
		criterion = F.binary_cross_entropy
		loss = criterion(out, target)
		loss.backward()
		optimizer.step()
		batch_id = batch_id + 1
		print('Train Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_id * len(data) / 64, len(train_data), 100. * batch_id / len(train_data),  loss.item()))

def test():
	model.eval()
	files = listdir(testdatadirectory)
	f = random.choice(files)
	print(f)
	img = Image.open(testdatadirectory + '/' + f)
	img_eval_Tensor = transform(img)
	img_eval_Tensor.unsqueeze_(0)
	print(img_eval_Tensor.size())
	data = Variable(img_eval_Tensor.cuda())
	out = model(data)
	print(out.data.max(1, keepdim=True)[1])
	#img.show()
	#print(str(out))
	x = input('')

if(learning == True):
	if loadNN == True:
		model = torch.load('net.pt')
	for epoch in range(1,epochs):
		train(epoch)
		torch.save(model, 'net.pt')

if(learning == False):
	model = torch.load('net.pt')

while 1:
	test()

and here a screenshot of the output, to show you what I mean:

I hope someone can help me.

rasbt · December 27, 2018, 5:03pm

...
		x = F.relu(self.fc2(x))
		x = self.fc3(x)
		return torch.sigmoid(x)

...
		criterion = F.binary_cross_entropy
		loss = criterion(out, target)

There might be other issues in the code, but one is that the binary_cross_entropy should receive logits, not probabilities. One way to fix that would be returning both logits and probabilities from your forward call:

    ...
    probas = F.softmax(logits, dim=1)
    return logits, probas

Piet_Lego · December 27, 2018, 5:44pm

Hey, thanks, but it did not changed anything.
My code now looks like this:

import torch
import torchvision
from torchvision import transforms
from PIL import Image
from os import listdir
import random
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import os
import sys

normalize = transforms.Normalize(
    mean=[0.485 , 0.456 , 0.406],
    std=[0.229, 0.224, 0.225]
)

transform = transforms.Compose([
    transforms.Resize(512),
    transforms.CenterCrop(512),
    transforms.ToTensor(),
    normalize])

trainingdatadirectory = 'Testdata'
testdatadirectory = 'Trainingdata'
train_data_list = []
train_data= []
target_list = []
files = listdir(trainingdatadirectory)


learning = False
loadNN = True
learningrate = 0.00001
epochs = 50

if (learning == True):
	for i in range(len(listdir(trainingdatadirectory))): #insert trainingfile-directory
		f = random.choice(files)
		#print("\r"+f, end="")
		files.remove(f)
		img = Image.open(trainingdatadirectory + "/" + f) #instert trainingfile-directory
		img_tensor = transform(img)
		train_data_list.append(img_tensor)
		isnear = 1 if 'near' in f else 0
		ismiddle = 1 if 'middle' in f else 0
		iswide = 1 if 'long' in f else 0
		isnothing = 1 if 'none' in f else 0
		target = (isnear,ismiddle,iswide, isnothing)        #redo the target definition
		#print("\r " + str(target), end="")
		target_list.append(target)
		if len(train_data_list) >= 64:
			#print(torch.stack(train_data_list).size())
			train_data.append((torch.stack(train_data_list), target_list))
			train_data_list = []
			target_list = []
			print("Loaded: " , len(train_data), ' of ', int(len(listdir(trainingdatadirectory))) / 64)



class Netz(nn.Module):
    def __init__(self):
        super(Netz, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 12, kernel_size=5)
        self.conv3 = nn.Conv2d(12, 18, kernel_size=5)
        self.conv4 = nn.Conv2d(18, 24, kernel_size=5)
        self.fc1 = nn.Linear(18816, 12500)
        self.fc2 = nn.Linear(12500, 1000)
        self.fc3 = nn.Linear(1000, 4)
        self.conv1_bn = nn.BatchNorm2d(6)
        self.conv2_bn = nn.BatchNorm2d(12)
        self.conv3_bn = nn.BatchNorm2d(18)
        self.conv4_bn = nn.BatchNorm2d(24)
    def forward(self, x):
        x = self.conv1(x)
        x = F.max_pool2d(x, 2)
        x = self.conv1_bn(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2_bn(x)
        x = F.relu(x)
        x = self.conv3(x)
        x = F.max_pool2d(x, 2)
        x = self.conv3_bn(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.max_pool2d(x, 2)
        x = self.conv4_bn(x)
        x = F.relu(x)
        x = x.view(-1, 18816)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        logit = torch.sigmoid(x)
        proba = F.softmax(logit, dim=1)
        return proba, logit

model = Netz()
model.cuda()

#optimizer = optim.SGD(model.parameters(), lr=learningrate)
optimizer = optim.Adam(model.parameters(), lr=learningrate)

def train(epoch):
	model.train()
	batch_id = 0
	for data, target in train_data:
		target = torch.FloatTensor(target)
		data = Variable(data)
		data = data.cuda()
		target = Variable(target)
		target = target.cuda()
		optimizer.zero_grad()
		proba, logit = model(data)
		criterion = F.binary_cross_entropy
		loss = criterion(proba, target)
		loss.backward()
		optimizer.step()
		batch_id = batch_id + 1
		print('Train Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_id * len(data) / 64, len(train_data), 100. * batch_id / len(train_data),  loss.item()))

def test():
	model.eval()
	files = listdir(testdatadirectory)
	f = random.choice(files)
	print(f)
	img = Image.open(testdatadirectory + '/' + f)
	img_eval_Tensor = transform(img)
	img_eval_Tensor.unsqueeze_(0)
	print(img_eval_Tensor.size())
	data = Variable(img_eval_Tensor.cuda())
	proba, logit = model(data)
	print(out.data.max(1, keepdim=True)[1]+ ", "+ out.data.max(1, keepdim=True))
	#img.show()
	#print(str(out))
	x = input('')

if(learning == True):
	if loadNN == True:
		model = torch.load('net.pt')
	for epoch in range(1,epochs):
		train(epoch)
		torch.save(model, 'net.pt')

if(learning == False):
    model = torch.load('net.pt')

    while 1:
        test()

But both of them outputs 2
like it was shown in the screenshot.

Did I included your idea correctly?

rasbt · December 27, 2018, 6:07pm

    x = self.fc3(x)
    logit = torch.sigmoid(x)
    proba = F.softmax(logit, dim=1)
    return proba, logit

You would still have the same issue. The logits are the outputs before the non-linear activation function. Or in other words, they are the weighted inputs of a given layer. I.e., you would do it as follows:

    logits = self.fc3(x)
    probas = F.softmax(logits, dim=1)
    return logits, probas

The reason is that F.binary_cross_entropy applies the softmax internally for numerical stability reasons.

An alternative would be using the negative log likelihood loss on the softmax’s. Or in other words, in PyTorch, F.cross_entropy does log(softmax) + negative log likelihood internally.

This naming convention is actually one of the confusing parts of PyTorch. For your reference, there is:

F,binary_cross_entropy that takes sigmoid outputs as inputs
F.binary_cross_entropy_with_logits that takes logits as inputs
F.cross_entropy that takes logits as inputs
F.nll_loss that takes softmax outputs as inputs

In my opinion, F.cross_entropy should have been names F.cross_entropy_with_logits similar to the binary cross entropy variant for consistency. (Since this is something that trips up beginners often, it’s maybe worth thinking about the naming convention for future versions of PyTorch @smth)

Piet_Lego · December 27, 2018, 7:32pm

Thanks again,
but it also did not changed anything I also looked at the output of the NN while it was training. There the output changed and looked like it worked. Then when I wanted to test the NN it just displays 2 again.
Could it be something at the test-function?

Here again my Code:

import torch
import torchvision
from torchvision import transforms
from PIL import Image
from os import listdir
import random
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import os
import sys

normalize = transforms.Normalize(
    mean=[0.485 , 0.456 , 0.406],
    std=[0.229, 0.224, 0.225]
)

transform = transforms.Compose([
    transforms.Resize(512),
    transforms.CenterCrop(512),
    transforms.ToTensor(),
    normalize])

trainingdatadirectory = 'Testdata'
testdatadirectory = 'Trainingdata'
train_data_list = []
train_data= []
target_list = []
files = listdir(trainingdatadirectory)


learning = True
loadNN = False
learningrate = 0.00001
epochs = 30

if (learning == True):
	for i in range(len(listdir(trainingdatadirectory))): #insert trainingfile-directory
		f = random.choice(files)
		#print("\r"+f, end="")
		files.remove(f)
		img = Image.open(trainingdatadirectory + "/" + f) #instert trainingfile-directory
		img_tensor = transform(img)
		train_data_list.append(img_tensor)
		isnear = 1 if 'near' in f else 0
		ismiddle = 1 if 'middle' in f else 0
		iswide = 1 if 'long' in f else 0
		isnothing = 1 if 'none' in f else 0
		target = (isnear,ismiddle,iswide, isnothing)        #redo the target definition
		#print("\r " + str(target), end="")
		target_list.append(target)
		if len(train_data_list) >= 64:
			#print(torch.stack(train_data_list).size())
			train_data.append((torch.stack(train_data_list), target_list))
			train_data_list = []
			target_list = []
			print("Loaded: " , len(train_data), ' of ', int(len(listdir(trainingdatadirectory))) / 64)



class Netz(nn.Module):
    def __init__(self):
        super(Netz, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 12, kernel_size=5)
        self.conv3 = nn.Conv2d(12, 18, kernel_size=5)
        self.conv4 = nn.Conv2d(18, 24, kernel_size=5)
        self.fc1 = nn.Linear(18816, 12500)
        self.fc2 = nn.Linear(12500, 1000)
        self.fc3 = nn.Linear(1000, 4)
        self.conv1_bn = nn.BatchNorm2d(6)
        self.conv2_bn = nn.BatchNorm2d(12)
        self.conv3_bn = nn.BatchNorm2d(18)
        self.conv4_bn = nn.BatchNorm2d(24)
    def forward(self, x):
        x = self.conv1(x)
        x = F.max_pool2d(x, 2)
        x = self.conv1_bn(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2_bn(x)
        x = F.relu(x)
        x = self.conv3(x)
        x = F.max_pool2d(x, 2)
        x = self.conv3_bn(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.max_pool2d(x, 2)
        x = self.conv4_bn(x)
        x = F.relu(x)
        x = x.view(-1, 18816)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        logits = self.fc3(x)
        proba = F.softmax(logits, dim=1)
        return proba, logits

model = Netz()
model.cuda()

#optimizer = optim.SGD(model.parameters(), lr=learningrate)
optimizer = optim.Adam(model.parameters(), lr=learningrate)

def train(epoch):
	model.train()
	batch_id = 0
	for data, target in train_data:
		target = torch.FloatTensor(target)
		data = Variable(data)
		data = data.cuda()
		target = Variable(target)
		target = target.cuda()
		optimizer.zero_grad()
		proba, logit = model(data)
		criterion = F.binary_cross_entropy
		output = str(proba.data.max(1, keepdim=True)[1])
		loss = criterion(proba, target)
		loss.backward()
		optimizer.step()
		batch_id = batch_id + 1
		print('\r Train Epoch : '+str(epoch)+' [' + str(batch_id * len(data) / 64) + '/' + str(len(train_data)) + ' ('+ str(100. * batch_id / len(train_data)) +'%)]\tLoss:'+str(loss.item()), end="")

def test():
	model.eval()
	files = listdir(testdatadirectory)
	f = random.choice(files)
	print(f)
	img = Image.open(testdatadirectory + '/' + f)
	img_eval_Tensor = transform(img)
	img_eval_Tensor.unsqueeze_(0)
	print(img_eval_Tensor.size())
	data = Variable(img_eval_Tensor.cuda())
	proba, logit = model(data)
	print(proba.data.max(1, keepdim=True))
	#img.show()
	#print(str(out))
	x = input('')

if(learning == True):
	if loadNN == True:
		model = torch.load('net.pt')
	for epoch in range(1,epochs):
		train(epoch)
		torch.save(model, 'net.pt')

if(learning == False):
    model = torch.load('net.pt')

    while 1:
        test()

rasbt · December 27, 2018, 7:41pm

Could it be something at the test-function?

Hm, yeah, could be. Sorry, but your code is a bit convoluted and hard to follow/read . Or it could also be some issue with the dataset. Can you check the training accuracy and test accuracy? Before debugging the test function, I maybe there’s already something weird with the training going on that needs to be fixed first.

Piet_Lego · December 27, 2018, 8:55pm

I have a loss of 0.27 at the end.
I can not give you a test accuracy because it would depend on the data. Because it just always displays that neuron 2 is the highest one.