Do we need to define a softmax function in the case of cross entropy

OBouldjedri · July 29, 2021, 7:52pm

Hi I am running a CNN on pythorch and using the cross entropy loss I was wondering if I should define the softmax inside the neural network itself or is it fine to feed that to the loss and then get the predicted values for the accurcy calculus: what would be the difference ? thank ypou

class Net(nn.Module):
def init(self):
super(Net, self).init()
self.conv1 = ConvLayer()
self.conv2 = ConvLayer2()
self.conv3 = ConvLayer3()

  ##########################################
  self.fc0 =nn.Linear(96*12*1,500)
  self.dr=nn.Dropout(0.2)
  self.fc1=nn.Linear(500,12)
  self.act=nn.ReLU()
 
###############################################

# x represents our data
def forward(self, x):
  # Pass data through conv1
  ############################## part1
  
  
  x =self.conv1(x)
  x=self.conv2(x)
  x=self.conv3(x)
  x = x.view(x.size(0), -1)
  #x = torch.flatten(x, 1)
  x=self.fc0(x)
  x=self.act(x)
  x=self.dr(x)
  x=self.fc1(x)
 

  return x

def train(model, optimizer, train_loader, epoch , writer,IMU):

    model.train()
    train_loss=0
    #train_reconstruction_loss=0
    #train_margin_loss=0
    
    correct=0
    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target, requires_grad=False)
        optimizer.zero_grad()
        
      
        data=data[:,None,:]
     
        #output, probs = model(data)
        
        target=target.type(torch.LongTensor)  #switch it to long

        target=torch.squeeze(target,)
 
        #data, target = Variable(data), Variable(target)
        
        
        target=target.cuda()
        
        output=model(data)
        #print()
        print('output',output)
        
        input()
        
        target=target.long()
        
        
        
        loss=criterion(output,target)
        
        predicted_softmax =F.softmax(output, dim = 1)
        print('predicted softmax',predicted_softmax)
        print()
        
        _, predicted = torch.max(predicted_softmax, 1)
        print('predicted',predicted)
        input()
        output=predicted

        loss.backward()
        train_loss+=loss.data
        optimizer.step()
        
      
        
        correct += (output == target).sum().item()

ayalaa2 · July 29, 2021, 8:10pm

If you’re using this loss specifically: CrossEntropyLoss — PyTorch 1.9.0 documentation

Then you do not need to do a softmax operation. The loss, internally, will use a logsoftmax for computational stability reasons before the NLL. It mentions this at the top of the documentation.

You can just feed it the output of your network along with the labels.