I’m working in a classification using PyTorch. My dataset contains 32.k*15 data, each data with 15 features, some feature represent with race or nation, can’t be represented as a number, so I use the one-hot encoding to preprocess it.
After building my network, I try to use CrossEntropyLoss() as my loss function, I did some research on it, but it doesn’t work. Interesting things is if I change CrossEntropyLoss() to MSELoss(), it’s work…
I think the loss function must be sat with the corresponding problem, but I have no idea how to solve such problem.
Here is my code:
[quote="a001932, post:1, topic:44663, full:true"]
I'm working in a classification using PyTorch. My dataset contains 32.k*15 data, each data with 15 features, some feature represent with race or nation, can't be represented as a number, so I use the one-hot encoding to preprocess it.
After building my network, I try to use CrossEntropyLoss() as my loss function, I did some research on it, but it doesn't work. Interesting things is if I change CrossEntropyLoss() to MSELoss(), it's work....
I think the loss function must be sat with the corresponding problem, but I have no idea how to solve such problem.
Here is my code:
[/quote]
# To predict someone's income >50K or not
#-------------read data & One-hot-encoding & Normalization --------------------
def read_file(train,test):
df = pd.DataFrame(pd.read_csv(train))
train_size = df.shape[0]
df1 = pd.read_csv(test)
df.fillna(0, inplace=True)
df1.fillna(0, inplace=True)
# one hot encoding
df1 = pd.concat([df1,df.iloc[:df1.shape[0],df.shape[1]-1]],axis = 1)
test_size = df1.shape[0]
df = pd.concat([df,df1],axis = 0)
df = pd.DataFrame(df)
df.fillna(0, inplace=True)
for i in range(df.shape[1]-1):
df.iloc[:,i].replace('?', df.iloc[:,0].value_counts().idxmax())
counter = 0 # --> for accumulating the index
c = 0;
for i in range(df.shape[1]):
if(not is_number(df.iloc[1,i+counter])):
# one-hot-encoding --> insert the old place
dff = pd.get_dummies(df[df.columns.values[i+counter]])
dff = pd.DataFrame(dff)
dfhead = df.iloc[:,:i+counter]
dftail = df.iloc[:,i+counter+1:df.shape[1]]
counter = counter + dff.shape[1]-1
df = pd.concat([dfhead,dff,dftail],axis = 1)
c = c + 1;
# Normalization
else:
mean = np.mean(df.iloc[:,i+counter], axis = 0)
std = np.std(df.iloc[:,i+counter], axis = 0)
if (std != 0):
df.iloc[:,i+counter] = (df.iloc[:,i+counter]-mean)/std
# Split data to test & training data
df_test = df.iloc[train_size:,:df.shape[1]-2]
df = df.iloc[:train_size,:]
# Build training data
# last two col <=50K or not ,>50K or not
# -> use X_train.shape[1]-1 & X_train.shape[1]-2
X_train = df.iloc[:,:df.shape[1]-2]
Y_train = pd.DataFrame(df.iloc[:,df.shape[1]-1])
Y_train = pd.concat([Y_train,pd.DataFrame(df.iloc[:,df.shape[1]-2])],axis = 1)
return X_train, Y_train, df_test
#------------------------------------------------------------------------------
# Model construction
class Network(nn.Module):
def __init__(self, shape1, shape2):
super(Network,self).__init__()
self.log = nn.Sequential(
# k dimension input and output just the class name --> one dimension
# input_feature, output_feature(number of class)
nn.Linear(shape1 ,75),
nn.Linear(75, 50),
nn.Linear(50,25),
nn.Linear(25, shape2)
)
# x for input training data
def forward(self,x):
y_pred = torch.sigmoid(self.log(x)).float()
return y_pred
#------------------------------------------------------------------------------
#-----------------------------code working here--------------------------------
# Define the specific dataset
class DataSet(Dataset):
def __init__(self):
X_train, Y_train, df_test = read_file('train.csv','test.csv')
self.len = X_train.shape[0]
self.x_data = torch.from_numpy(X_train.values)
self.y_data = torch.from_numpy(Y_train.values)
def __getitem__(self,index):
return self.x_data[index,:], self.y_data[index,:]
def __len__(self):
return self.len
train_set = DataSet()
batch_size = 500
# to build the network with right input size
xxx, yy = train_set.__getitem__(5)
train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = 4)
model = Network(xxx.shape[0],2)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
loss_log = nn.CrossEntropyLoss()
#nn.MSELoss()
#nn.CrossEntropyLoss()
# Training
model.train()
accuracy = 0
for epoch in range(10):
for i, data in enumerate(train_loader):
accuracy = 0
input, label = data
input = torch.tensor(input, dtype = torch.float)
label = torch.tensor(label, dtype = torch.long)
y_pred = F.softmax(model(input))
loss = loss_log(y_pred, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
output = torch.max(F.softmax(y_pred),1)[1]
pred_y = (output.data.numpy()).reshape((output.data.numpy()).shape[0],1)
target_y = (label[:,0].data.numpy()).reshape((label.data.numpy()).shape[0],1)
if (i % 100 == 0):
accuracy = float((pred_y == target_y).sum()) / float(target_y.size)
print('epoch :',epoch+1, 'accuracy :', accuracy)