Cross Entropy Loss Dimensions

FahadAslamCS · April 18, 2020, 3:01pm

I am facing a problem with my own data dimensions. I am completely new to PyTorch I will be grateful if one can help me here.
I am getting error of “Target (10)(this no changes between my class name) is out of bounds”

I have 6 classes denoted by 0, 5,20,40, 2.5, 10
I do not know how to handle it.

np_target = pd.read_csv('naphData.csv').astype('int')
np_data = pd.read_csv('naphTarget.csv')
X_train, X_test, y_train, y_test = train_test_split(np_data, np_target, test_size=0.2)

train_dataset = torch.tensor(X_train.values)
train_dataset = train_dataset.float()
train_target = torch.tensor(y_train.values)

test_dataset = torch.tensor(X_test.values)
test_dataset = test_dataset.float()
test_target = torch.tensor(y_test.values)

# Define a Neural Network class

class Net(nn.Module):
    
    # Constructor
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)

    # Prediction    
    def forward(self, x):
        x = torch.sigmoid(self.linear1(x))  
        x = self.linear2(x)
        return x

train_data = []
for i in range(len(train_dataset)):
   train_data.append([train_dataset[i], train_target[i]])

trainloader = torch.utils.data.DataLoader(train_data, shuffle=True, batch_size=10)

i1, l1 = next(iter(trainloader))
print(i1.shape)

test_data = []
for i in range(len(test_dataset)):
   test_data.append([test_dataset[i], test_target[i]])

validationloader = torch.utils.data.DataLoader(test_data, shuffle=True, batch_size=10)

i1, l1 = next(iter(validationloader))
print(i1.shape)

# Create the model with 100 neurons

input_dim = 1 * 160  #160 features in each sample
hidden_dim = 100
output_dim = 6  #number of class

model = Net(input_dim, hidden_dim, output_dim)

# Create criterion function
criterion = nn.CrossEntropyLoss()

def train(model, criterion, train_loader, validation_loader, optimizer, epochs):
    i = 0
    useful_stuff = {'training_loss': [],'validation_accuracy': []}  
    for epoch in range(epochs):
        for i, (x, y) in enumerate(train_loader) : 
            optimizer.zero_grad()
            z = model(x.view(-1, 1 * 160))
            y = y.squeeze(1)
          
            loss = criterion(z, y) #getting error here
            loss.backward()
            optimizer.step()
             #loss for every iteration
            useful_stuff['training_loss'].append(loss.data.item())
        correct = 0
        for x, y in validation_loader:
            #validation 
            z = model(x.view(-1, 1 * 160))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()
        accuracy = 100 * (correct / len(validation_dataset))
        useful_stuff['validation_accuracy'].append(accuracy)
    return useful_stuff
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Train the model

training_results = train(model, criterion, trainloader, validationloader, optimizer, epochs=100)

KFrank · April 18, 2020, 7:50pm

Hello Fahad!

When you use CrossEntropyLoss, your target y that you pass
in to criterion must be integer class labels that take on values
running from 0 to nClass - 1 (in your case, (0, 1, 2, 3, 4, 5)).

If your class labels are originally (0, 5, 20, 40, 2.5, 10), you will
have to remap them before passing them to criterion.

Just to be clear, the shape of y should simply be [nBatch].

Best.

K. Frank