I am fairly new to NLP. I was trying to learn word embeddings using word2vec on one of my favorite manga series. The problem I am facing is that the loss and accuracy values are stagnating after a few epochs. I need some guidance on how to increase the accuracy values and decrease the loss value.
I also tried to extract the embeddings and visualized the results for main characters and the result is random.
Below is a snapshot of the code and the loss and accuracy plots.
class NGramDoc2vec(nn.Module):
def __init__(self, word_vocab_size, doc_vocab_size, embedding_dim, context_size):
super(NGramDoc2vec, self).__init__()
self.word_embeddings = nn.Embedding(word_vocab_size, embedding_dim)
self.doc_embedings = nn.Embedding(doc_vocab_size, embedding_dim)
self.linear1 = nn.Linear(context_size*embedding_dim, 128)
self.linear2 = nn.Linear(128, word_vocab_size)
def forward(self, inputs):
word_embed = self.word_embeddings(inputs[:, :-1])
word_embed = word_embed.view(word_embed.shape[0], -1)
# doc_embed = self.doc_embedings(inputs[:, -1])
# print(word_embed.shape, doc_embed.shape, doc_embed.unsqueeze(1).shape)
# embeds = torch.cat([word_embed, doc_embed.unsqueeze(1)], axis = 1).view(word_eb.shape[0], -1)
out = F.relu(self.linear1(word_embed))
out = self.linear2(out)
log_probs = F.log_softmax(out, dim=1)
return log_probs
# def get_doc_embedings(self, inputs):
# doc_embed = self.doc_embedings(inputs).view((inputs.shape[0], -1))
# return doc_embed
def get_word_embedings(self, inputs):
word_embed = self.word_embeddings(inputs).view((1, -1))
return word_embed
# get the current value of learning rate
def get_lr(opt):
for param_group in opt.param_groups:
return param_group['lr']
#number of correct predictions per batch
def metrics_batch(output, target):
# get output class
pred = output.argmax(dim=1, keepdim=True)
# compare output class with target class
corrects=pred.eq(target.view_as(pred)).sum().item()
return corrects
# loss value per batch
def loss_batch(loss_func, output, target, opt=None):
loss = loss_func(output, target)
with torch.no_grad():
metric_b = metrics_batch(output, target)
if opt is not None:
opt.zero_grad()
loss.backward()
opt.step()
return loss.item(), metric_b
# loss value and performance metric for the entire dataset
def loss_epoch(model, loss_func, dataset_dl, sanity_check=False, opt=None):
running_loss = 0.0
running_metric = 0.0
len_data = len(dataset_dl.dataset)
for xb, yb in dataset_dl:
# move batch to device
xb = xb.to(device)
yb = yb.to(device)
# get model output
output = model(xb)
# get loss per batch
loss_b, metric_b = loss_batch(loss_func, output, yb, opt)
# update running loss
running_loss+=loss_b
# update running metric
if metric_b is not None:
running_metric+=metric_b
# break the loop in case sanity check
if sanity_check == True:
break
# average loss value
loss=running_loss/float(len_data)
# average metric value
metric=running_metric/float(len_data)
return loss, metric
def train_val(model, params):
# extract model parameters
num_epochs = params['num_epochs']
loss_func = params['loss_func']
opt = params['optimizer']
train_dl = params['train_dl']
val_dl = params['val_dl']
sanity_check = params['sanity_check']
lr_scheduler = params['lr_scheduler']
path2weights = params['path2weights']
# history of loss values per epoch
loss_history = {
'train':[],
'val': []
}
# history of metric values in each epoch
metric_history = {
'train': [],
'val': []
}
# create a copy of the state_dict
# a deep copy of weights for the best performing model
best_model_wts = copy.deepcopy(model.state_dict())
# initialize the best loss to a large value
best_loss = float('inf')
# main loop
for epoch in range(num_epochs):
# get current learnings
current_lr = get_lr(opt)
print('Epoch {}/{}, current_lr {}'.format(epoch, num_epochs-1, current_lr))
# train model on training dataset
model.train()
train_loss, train_metric = loss_epoch(model, loss_func, train_dl, sanity_check, opt)
# collect loss and metric for training dataset
loss_history['train'].append(train_loss)
metric_history['train'].append(train_metric)
# evaluate model on the validation dataset
model.eval()
with torch.no_grad():
val_loss, val_metric = loss_epoch(model, loss_func, val_dl, sanity_check)
# collect loss and metric for validation dataset
loss_history['val'].append(val_loss)
metric_history['val'].append(val_metric)
# store the best model
if val_loss < best_loss:
best_loss = val_loss
best_model_wts = copy.deepcopy(model.state_dict())
# store weights into a local file
torch.save(model.state_dict(), path2weights)
print('copied best model weights!')
# learning rate schedule
lr_scheduler.step(val_loss)
if current_lr != get_lr(opt):
print('Loading best model weights!')
model.load_state_dict(best_model_wts)
print('train losss: %.6f, dev loss: %.6f, accuracy: %.2f' \
%(train_loss, val_loss, 100*val_metric))
print('-'*10)
# load best model weights
model.load_state_dict(best_model_wts)
return model, loss_history, metric_history
EMBEDDING_DIM = 8
CONTEXT_SIZE = 4
loss_func = nn.NLLLoss(reduction='sum')
model = NGramDoc2vec(len(word_to_index), len(doc_to_index), EMBEDDING_DIM, CONTEXT_SIZE)
opt = optim.SGD(model.parameters(), lr=0.001)
lr_scheduler = ReduceLROnPlateau(opt, mode='min', factor=0.5, patience=20, verbose=1)
model = model.to(device)
params_train = {
'num_epochs': 100,
'optimizer' : opt,
'loss_func' : loss_func,
'train_dl' : train_loader,
'val_dl' : val_loader,
'sanity_check' : False,
'lr_scheduler' : lr_scheduler,
'path2weights' : './mdl/word2vec/weights.pt',
}
model, loss_hist, metric_hist = train_val(model, params_train)
I think I am missing something here and am not able to pinpoint the same. It would be great if you could help me in understanding it.