Ignore the commented out code.
I created a simple classification model as follows
The classes are 0 and 1 (binary classification)
This model gives a very poor result 58% accuracy whilst the same binaryFeats from vectorToTrain.npy when trained using MLP classifier of scikit-learn gives a very good result with 90% accuracy. I have checked changing every parameter. The weights and loss are also changed with each epoch but results are never getting more than 58.xx%.
Can someone please help me in finding the error Iām making in this code?
torch.manual_seed(123)
class pathLSTMTagger(nn.Module):
def __init__(self, embedding_dim, hidden_dim, binary_dim, comb_dim, tagset_size):
super(pathLSTMTagger, self).__init__()
self.hidden_dim = hidden_dim
self.binary_dim = binary_dim
self.embedding_dim = embedding_dim
self.comb_dim = comb_dim
# The LSTM takes word embeddings as inputs, and outputs hidden states
# with dimensionality hidden_dim.
self.lstm = nn.LSTM(embedding_dim, hidden_dim)
self.hidden = self.init_hidden()
# The linear layer that maps from hidden state space to tag space
self.hidden2Path = nn.Linear(hidden_dim, comb_dim)
self.binary2Path = nn.Linear(binary_dim, comb_dim)
self.comb2tag = nn.Linear(2*comb_dim, tagset_size)
self.binary2L = nn.Linear(binary_dim, 100)
self.L2Tag = nn.Linear(100, tagset_size)
def init_hidden(self):
# Before we've done anything, we dont have any hidden state.
# Refer to the Pytorch documentation to see exactly
# why they have this dimensionality.
# The axes semantics are (num_layers, minibatch_size, hidden_dim)
return (torch.zeros(1, 1, self.hidden_dim),
torch.zeros(1, 1, self.hidden_dim))
def forward(self, lstm_embedding, feature_embedding):
# lstm_out, self.hidden = self.lstm(lstm_embedding, self.hidden)
# pathEmbedding = self.hidden2Path(self.hidden[0].view(1, self.hidden_dim))
# binaryEmbedding = self.binary2Path(feature_embedding.view(1, self.binary_dim))
# combinedEmbedding = torch.cat((pathEmbedding, binaryEmbedding), 1)
# activate = F.relu(combinedEmbedding)
# tag_space = self.comb2tag(activate)
l_space = self.binary2L(feature_embedding.view(1, self.binary_dim))
l_space = F.relu(l_space)
tag_space = self.L2Tag(l_space)
tag_scores = F.softmax(tag_space, dim=1)
return tag_scores
# Train the model:
binaryFeats = np.load('./vectorToTrain.npy')
pathFeats = np.load('./pathsToTrain.npy')
testBinaryFeats = np.load('./Test/vectorToTest.npy')
testPathFeats = np.load('./Test/pathsToTest.npy')
EMBEDDING_DIM = 102
HIDDEN_DIM = 100
COMB_DIM = 400
BINARY_DIM = binaryFeats.shape[1] - 2
model = pathLSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, BINARY_DIM, COMB_DIM, 2)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.00001, momentum=0.9, weight_decay=0.001)
for epoch in range(200):
running_loss = 0.0
for k in range(binaryFeats.shape[0]):
# Step 1. Remember that Pytorch accumulates gradients.
# We need to clear them out before each instance
if k%1 == 0:
model.zero_grad()
optimizer.zero_grad()
# Also, we need to clear out the hidden state of the LSTM,
# detaching it from its history on the last instance.
model.hidden = model.init_hidden()
# Step 2. Get our inputs ready for the network, that is, turn them into
# Tensors of word indices.
# Step 3. Run our forward pass.
path_in = torch.tensor(pathFeats[k], dtype=torch.float32)
path_in = path_in.unsqueeze(1)
binary_in = torch.tensor(binaryFeats[k][:-2], dtype=torch.float32)
tag_scores = model(path_in, binary_in)
# Step 4. Compute the loss, gradients, and update the parameters by
# calling optimizer.step()
output = torch.tensor([binaryFeats[k][-1]], dtype=torch.long)
loss = loss_function(tag_scores, output)
loss.backward()
optimizer.step()
running_loss += loss.item()
if k % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, k + 1, running_loss / 2000))
running_loss = 0.0
correct = 0
with torch.no_grad():
for k in range(testBinaryFeats.shape[0]) :
path_in = torch.tensor(testPathFeats[k], dtype=torch.float32)
path_in = path_in.unsqueeze(1)
binary_in = torch.tensor(testBinaryFeats[k][:-2], dtype=torch.float32)
tag_scores = model(path_in, binary_in)
_, pred = torch.max(tag_scores, 1)
# print tag_scores, pred
actual = torch.tensor([testBinaryFeats[k][-1]], dtype=torch.long)
if pred == actual :
correct += 1
print "Accuracy: ", 100 * float(correct)/float(testBinaryFeats.shape[0])
# print model.binary2tag.weight