Hi, I have been implementing a deep averaging network with GloVE pre-trained weights using PyTorch. I haven’t implemented batching in my code yet, but so far my prediction accuracy is only 50% even after I increase my epoch it is still the same. I have been researching a lot and found nothing. Could you please help me with it?
Model:
class NeuralSentimentClassifier(SentimentClassifier, nn.Module):
def init(
self,
word_embeddings,
inp_size,
hid_size,
out_size,
dp = 0.3
):
super(NeuralSentimentClassifier, self).init()
self.word_embeddings = word_embeddings
self.embedding = nn.Embedding.from_pretrained(
torch.from_numpy(self.word_embeddings.vectors),
padding_idx=0,
)
self.embedding.weight.requires_grad = False
self.model = nn.Sequential(
#nn.BatchNorm1d(inp_size),
#nn.Dropout(dp),
nn.Linear(inp_size, hid_size),
nn.ReLU(),
#nn.BatchNorm1d(emb_size),
nn.Linear(hid_size, out_size),
#nn.Dropout(dp)
)
self.log_softmax = nn.LogSoftmax(dim=0)
def get_mean_embedding(self, ex_words: List[str]):
word_idx = []
for word in ex_words:
idx = self.word_embeddings.word_indexer.index_of(word)
word_idx.append(idx if idx != -1 else 1)
emb = self.embedding(torch.LongTensor(word_idx))
return torch.mean(emb, dim=0)
def forward(self, ex_words: List[str]):
out = self.model(self.get_mean_embedding(ex_words))
return self.log_softmax(out)
def predict(self, ex_words: List[str]) -> int:
return torch.argmax(self.forward(ex_words))
Train DAN:
def train_deep_averaging_network(args, train_exs: List[SentimentExample], dev_exs: List[SentimentExample], word_embeddings: WordEmbeddings) → NeuralSentimentClassifier:
“”"
:param args: Command-line args so you can access them here
:param train_exs: training examples
:param dev_exs: development set, in case you wish to evaluate your model during training
:param word_embeddings: set of loaded word embeddings
:return: A trained NeuralSentimentClassifier model
“”"
num_classes = 2
num_epoch = 5
batch_size = 128
DAN = NeuralSentimentClassifier(
word_embeddings,
inp_size=word_embeddings.get_embedding_length(), # 50 or 300
hid_size=4,
out_size=num_classes,
)
DAN.double() # Numpy float64 problem
optimizer = optim.Adam(DAN.parameters(), lr=0.1)
loss_function = nn.NLLLoss()
random.shuffle(train_exs)
for epoch in range(num_epoch):
total_loss = 0.0
for se in train_exs:
#1# Prepare for inputs
x = se.words
y = torch.zeros(num_classes, dtype=torch.long)
# scatter will write the value of 1 into the position of y_onehot given by y
y.scatter_(0, torch.from_numpy(np.asarray(se.label, dtype=np.int64)), 1)
#2# Zero the weights
DAN.zero_grad()
#3# Forward the embedding
log_probs = DAN.forward(x)
#4# Calculate Loss
loss = loss_function(log_probs, y)
#5# Backprop and update loss
loss.backward()
optimizer.step()
total_loss += loss
print(epoch, "->", total_loss)
return DAN
My train_exs is a list of object with consists of {list[str]; words, list[int]:label}. This DAN is used to do sentiment analysis from given sentences.