Multi-target not supported at /Users/soumith/mc3build/conda-bld/pytorch_1549593514549/work/aten/src/THNN/generic/ClassNLLCriterion.c:21

I’m working in a classification using PyTorch. My dataset contains 32.k*15 data, each data with 15 features, some feature represent with race or nation, can’t be represented as a number, so I use the one-hot encoding to preprocess it.

After building my network, I try to use CrossEntropyLoss() as my loss function, I did some research on it, but it doesn’t work. Interesting things is if I change CrossEntropyLoss() to MSELoss(), it’s work…

I think the loss function must be sat with the corresponding problem, but I have no idea how to solve such problem.

Here is my code:

[quote="a001932, post:1, topic:44663, full:true"]
I'm working in a classification using PyTorch. My dataset contains 32.k*15 data, each data with 15 features, some feature represent with race or nation, can't be represented as a number, so I use the one-hot encoding  to preprocess it.

After building my network, I try to use CrossEntropyLoss() as my loss function, I did some research on it, but it doesn't work. Interesting things is if I change CrossEntropyLoss() to MSELoss(), it's work....

I think the loss function must be sat with the corresponding problem, but I have no idea how to solve such problem.


Here is my code:
[/quote]


# To predict someone's income >50K or not

#-------------read data & One-hot-encoding & Normalization -------------------- 
def read_file(train,test):
    df = pd.DataFrame(pd.read_csv(train))
    train_size = df.shape[0]
    df1 = pd.read_csv(test)
    df.fillna(0, inplace=True)
    df1.fillna(0, inplace=True)
    # one hot encoding
    df1 = pd.concat([df1,df.iloc[:df1.shape[0],df.shape[1]-1]],axis = 1)
    
    test_size = df1.shape[0]
    df = pd.concat([df,df1],axis = 0)
    df = pd.DataFrame(df)
    df.fillna(0, inplace=True)
    
    for i in range(df.shape[1]-1):
        df.iloc[:,i].replace('?', df.iloc[:,0].value_counts().idxmax())
    counter = 0  # --> for accumulating the index
    c = 0;
    for i in range(df.shape[1]):
        if(not is_number(df.iloc[1,i+counter])):
            # one-hot-encoding --> insert the old place
            dff = pd.get_dummies(df[df.columns.values[i+counter]])
            dff = pd.DataFrame(dff)
            dfhead = df.iloc[:,:i+counter]
            dftail = df.iloc[:,i+counter+1:df.shape[1]]
            counter = counter + dff.shape[1]-1 
            df = pd.concat([dfhead,dff,dftail],axis = 1)
            c = c + 1;
            
    # Normalization
        else:
            mean = np.mean(df.iloc[:,i+counter], axis = 0)
            std = np.std(df.iloc[:,i+counter], axis = 0)
            if (std != 0):
                df.iloc[:,i+counter] =  (df.iloc[:,i+counter]-mean)/std
    
    # Split data to test & training data           
    df_test = df.iloc[train_size:,:df.shape[1]-2]
    df =  df.iloc[:train_size,:]
    
    # Build training data    
    # last two col <=50K or not ,>50K or not 
    # -> use X_train.shape[1]-1 & X_train.shape[1]-2
    X_train = df.iloc[:,:df.shape[1]-2]
    Y_train = pd.DataFrame(df.iloc[:,df.shape[1]-1])
    Y_train = pd.concat([Y_train,pd.DataFrame(df.iloc[:,df.shape[1]-2])],axis = 1)
    
    return X_train, Y_train, df_test
#------------------------------------------------------------------------------

# Model construction
class Network(nn.Module):
    def __init__(self, shape1, shape2):
        super(Network,self).__init__()
        self.log = nn.Sequential(
        # k dimension input and output just the class name --> one dimension
        # input_feature, output_feature(number of class)
        nn.Linear(shape1 ,75),
        nn.Linear(75, 50),
        nn.Linear(50,25),
        nn.Linear(25, shape2)
        )
    
    # x for input training data
    def forward(self,x):
        y_pred = torch.sigmoid(self.log(x)).float()
        return y_pred
#------------------------------------------------------------------------------
        


#-----------------------------code working here-------------------------------- 
# Define the specific dataset 
class DataSet(Dataset):
    def __init__(self):
        X_train, Y_train, df_test = read_file('train.csv','test.csv')
        
        self.len = X_train.shape[0]
        self.x_data = torch.from_numpy(X_train.values)
        self.y_data = torch.from_numpy(Y_train.values)
        
    def __getitem__(self,index):
        return self.x_data[index,:], self.y_data[index,:]
    
    def __len__(self):
        return self.len


train_set = DataSet()
batch_size = 500
# to build the network with right input size
xxx, yy = train_set.__getitem__(5) 
train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = 4)
model = Network(xxx.shape[0],2)

optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
loss_log = nn.CrossEntropyLoss()
#nn.MSELoss()
#nn.CrossEntropyLoss()

# Training
model.train()
accuracy = 0

for epoch in range(10):
    for i, data in enumerate(train_loader):
        accuracy = 0
        input, label = data

        input = torch.tensor(input, dtype = torch.float)
        label = torch.tensor(label, dtype = torch.long)

        y_pred = F.softmax(model(input))
        loss = loss_log(y_pred, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
        output = torch.max(F.softmax(y_pred),1)[1]        
        pred_y = (output.data.numpy()).reshape((output.data.numpy()).shape[0],1)
        target_y = (label[:,0].data.numpy()).reshape((label.data.numpy()).shape[0],1)
        if (i % 100 == 0):
            accuracy = float((pred_y == target_y).sum()) / float(target_y.size)
            print('epoch :',epoch+1, 'accuracy :', accuracy)