I want to implement a multi-class classifier for categorizing sentences in class labels 0, 1 and 2 using PyTorch. The input to the neural network will be an average of the word emeddings (vectors of 300 dimensions) of all the words that form a sentence. Hence the input size is 300.
This is the sklearn code for the same which I found here: https://github.com/mdvu15/CS488-Senior-Capstone/blob/master/classifierTrain.py
(typing out the code snippet)
X_train, X_test, y_train, y_test = train_test_split(V, y, test_size = 0.25) #25% of data set aside for testing
mlp = MLPClassifier(hidden_layer_sizes=(500, 20, 20, 20), max_iter=1000, batch_size=32,
warm_start=True, early_stopping= True) #Classifier object
mlp.fit(X_train, y_train)
This is the PyTorch code I have so far for this-
class Linear_Model(torch.nn.Module):
def init(self):
super().init()
self.device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
self.fc1 = torch.nn.Linear(300, 500).to(device)
self.fc2 = torch.nn.Linear(500, 20).to(device)
self.fc3 = torch.nn.Linear(20, 20).to(device)
self.fc4 = torch.nn.Linear(20, 20).to(device)
self.fc5 = torch.nn.Linear(20, 3).to(device)
# activation functions
self.relu = torch.nn.ReLU().to(device)
self.softmax = torch.nn.Softmax(dim = 1).to(device)
def forward (self, input, flag):
fc_out = self.relu(self.fc1(input))
fc_out = self.relu(self.fc2(fc_out))
fc_out = self.relu(self.fc3(fc_out))
fc_out = self.relu(self.fc4(fc_out))
fc_out = self.softmax(self.fc5(fc_out))
return fc_out
net = Linear_Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr = 0.001)
#Training
for epoch in range(n_epochs):
train_accuracy = [] #for one epoch
train_losses = [] #for one epoch
net.train()
for inputs, labels in trainloader:
inputs, labels = inputs.to(device), labels.to(device)
output = net(inputs.float(), 0)
loss = criterion(output.squeeze(), labels)
train_losses.append(loss.item())
loss.backward()
optimizer.step() #Stochastic gradient descent on mini batches
optimizer.zero_grad()
# calculate training accuracy
pred = torch.argmax(output.squeeze(), axis = 1)
print(pred)
correct_tensor = pred.eq(labels.view_as(pred))
correct = np.squeeze(correct_tensor.cpu().numpy()) if torch.cuda.is_available() else np.squeeze(correct_tensor.numpy())
num_correct = np.sum(correct)
train_accuracy += [num_correct / batch_size]
Is this code correct? If not, then can the corrections (also suggestions if any) be pointed out? Thanks!