Loss only update at the first time

jerrychang · September 4, 2019, 5:01am

Hello everyone, I’m training my custom data recently, but the loss only update at the first time, and I really don’t know where the problem is

here is the code

for epoch in range(cfg['epoch']):
		for phase in ['train', 'valid']:
			if phase == 'train':
				net.train()
			else:
				net.eval()

			running_loss, running_acc = 0.0, 0.0

			for i, (point, ans) in enumerate(all_loader[phase]):
				if cfg['use_cuda'] and torch.cuda.is_available():
					point = point.cuda()
					ans = ans.cuda()

				optimizer.zero_grad()
				with torch.set_grad_enabled(phase == 'train'):
					out = net(point.float())
					_, predicted = torch.max(out, 1)
					loss = criterion(out, ans.long())

					if phase == 'train':
						loss.backward()
						optimizer.step()

				running_loss += loss.item()
				running_acc += (predicted == ans.long()).sum().item()

the result is

train, Epochs [1/5], Loss: 0.2585
valid, Epochs [1/5], Loss: 0.2465
==================================================
train, Epochs [2/5], Loss: 0.2573
valid, Epochs [2/5], Loss: 0.2465
==================================================
train, Epochs [3/5], Loss: 0.2573
valid, Epochs [3/5], Loss: 0.2465
==================================================
train, Epochs [4/5], Loss: 0.2573
valid, Epochs [4/5], Loss: 0.2465
==================================================
train, Epochs [5/5], Loss: 0.2573
valid, Epochs [5/5], Loss: 0.2465
==================================================

ptrblck · September 4, 2019, 9:51am

Are you reinitializing the model somewhere?
Could you post the whole training code so that we could have a look?

jerrychang · September 4, 2019, 11:08am

This is the whole training code

net = Conv1DNet().float()

if cfg['use_cuda'] and torch.cuda.is_available():
	net = net.cuda()

history = train_evaluate(net, all_loader, cfg)

def train_evaluate(net, all_loader, cfg):
	criterion = nn.CrossEntropyLoss()
	optimizer = optim.RMSprop(net.parameters(), lr = cfg['learning_rate'])

	history = {
		'train_loss': [],
		'train_acc': [],
		'valid_loss': [],
		'valid_acc': []
	}

	for epoch in range(cfg['epoch']):
		for phase in ['train', 'valid']:
			if phase == 'train':
				net.train()
			else:
				net.eval()

			running_loss, running_acc = 0.0, 0.0

			for i, (point, ans) in enumerate(all_loader[phase]):
				if cfg['use_cuda'] and torch.cuda.is_available():
					point = point.cuda()
					ans = ans.cuda()

				optimizer.zero_grad()
				with torch.set_grad_enabled(phase == 'train'):
					out = net(point.float())
					_, predicted = torch.max(out, 1)
					loss = criterion(out, ans.long())

					if phase == 'train':
						loss.backward()
						optimizer.step()

				running_loss += loss.item()
				running_acc += (predicted == ans.long()).sum().item()

			history[phase + '_loss'].append(running_loss / cfg[phase + 'set_size'])
			history[phase + '_acc'].append(running_acc / cfg[phase + 'set_size'])

			print ("{}, Epochs [{}/{}], Loss: {:.4f}".format(phase, epoch + 1, cfg['epoch'], history[phase + '_loss'][-1]))

		print ("==================================================")

	print ("Average accurancy of the Net is {:.2f}%".format(sum(history['valid_acc']) / cfg['epoch'] * 100))
	return (history)

and the net is

class Conv1DNet(nn.Module):
	def __init__(self):
		super(Conv1DNet, self).__init__()

		self.fcc = nn.Linear(2, 128)
		self.conv1 = nn.Conv1d(18, 128, 1)
		self.pool = nn.MaxPool1d(2)
		self.conv2 = nn.Conv1d(128, 32, 1)

		self.fc1 = nn.Linear(32 * 32, 32)
		self.fc2 = nn.Linear(32, 16)
		self.fc3 = nn.Linear(16, 3)

	def forward(self, x):
		x = self.fcc(x)
		x = self.pool(fuc.relu(self.conv1(x)))
		x = self.pool(fuc.relu(self.conv2(x)))
		x = x.view(-1, 32 * 32)
		x = fuc.relu(self.fc1(x))
		x = fuc.relu(self.fc2(x))
		x = fuc.softmax(self.fc3(x), dim = 1)

		return (x)

and th cfg is the dict

cfg = {'batch_size': 4, 
          'num_workers': 2, 
          'learning_rate': 0.01, 
          'epoch': 5, 
          'use_cuda': True, 
          'trainset_size': 272, 
          'validset_size': 91, 
          'train_num': 68}

ptrblck · September 4, 2019, 11:14am

Thanks for the code!
Could you remove the last softmax call from your model?
nn.CrossEntropyLoss expects logits as the model’s output, since internally F.log_softmax and nn.NLLLoss will be used.
Your model should thus just return self.fc3(x) without and non-linearity.

jerrychang · September 4, 2019, 11:40am

Oh thanks! it works!
Thanks for the reply and the clear explanation!