Hello! I’m trying to train a model that estimates probability that given answer is appropriate for given query phrase. I took just 50 samples from my dataset (for example) and trained a test model. The problem I encountered is when I set batch_size for my Dataloader to 10 (or whatever >1) then loss won’t decrease:
LOSS: 0.9896 | ACC: 0.4830
LOSS: 0.9395 | ACC: 0.4845
LOSS: 0.8930 | ACC: 0.4781
LOSS: 0.7217 | ACC: 0.5270
LOSS: 0.8422 | ACC: 0.5115
LOSS: 0.8768 | ACC: 0.4669
LOSS: 0.7862 | ACC: 0.5239
LOSS: 0.7203 | ACC: 0.5401
LOSS: 0.8209 | ACC: 0.4989
LOSS: 0.9027 | ACC: 0.4701
LOSS: 0.6955 | ACC: 0.5293
LOSS: 0.7857 | ACC: 0.4915
LOSS: 0.8318 | ACC: 0.4931
LOSS: 0.8635 | ACC: 0.4902
LOSS: 0.7408 | ACC: 0.5089
But if I set batch_size to 1, everything is fine:
LOSS: 1.0307 | ACC: 0.4336
LOSS: 0.5335 | ACC: 0.6552
LOSS: 0.3271 | ACC: 0.7479
LOSS: 0.2570 | ACC: 0.7969
LOSS: 0.1890 | ACC: 0.8435
LOSS: 0.1637 | ACC: 0.8582
LOSS: 0.1309 | ACC: 0.8830
LOSS: 0.1199 | ACC: 0.8923
LOSS: 0.0974 | ACC: 0.9124
LOSS: 0.0906 | ACC: 0.9171
LOSS: 0.0848 | ACC: 0.9216
LOSS: 0.0747 | ACC: 0.9300
LOSS: 0.0683 | ACC: 0.9360
LOSS: 0.0625 | ACC: 0.9411
LOSS: 0.0576 | ACC: 0.9542
(Every line is the result of one epoch)
My model and train pieces of code:
class Encoder(nn.Module):
def __init__(self, embedding_size, hidden_size, n_layers):
super(Encoder, self).__init__()
self.layers = n_layers
self.hidden_size = hidden_size
self.encoder = nn.GRU(embedding_size, hidden_size, n_layers, batch_first=True, bidirectional=True)
for name, param in self.named_parameters():
if len(param.size()) > 1:
weight_init.kaiming_normal_(param)
def forward(self, phrase_emb):
# outputs - B x T x H*2
# hidden - B x L*2 x H
outputs, hidden = self.encoder(phrase_emb.to(device), self.get_init_vector(phrase_emb.size(0)).to(device))
return hidden
def get_init_vector(self, batch_size):
return torch.zeros(self.layers*2, batch_size, self.hidden_size).to(device)
class RankModel(nn.Module):
def __init__(self, encoder, encoder2, n_layers, encoder_hidden):
super(RankModel, self).__init__()
self.encoder = encoder
self.encoder2 = encoder2
self.matcher = nn.Linear(encoder_hidden*2*n_layers, encoder_hidden*2*n_layers)
self.sigmoid = nn.Sigmoid()
self.hidden = encoder_hidden
for name, param in self.named_parameters():
if len(param.size()) > 1:
weight_init.kaiming_normal_(param)
def forward(self, phrase, response):
batch_size = phrase.size(0)
x_emb = self.encoder(phrase).view(batch_size, 1, self.hidden*2)
y_emb = self.encoder2(response).view(batch_size, self.hidden*2, 1)
x_match = self.matcher(x_emb) # (B x 1 x H*2)
x_y_match = torch.bmm(x_match, y_emb)
probability = self.sigmoid(x_y_match.squeeze(2).squeeze(1))
return probability
hidden_size = 1000
n_layers = 1
encoder = Encoder(EMBEDDING_SIZE, hidden_size, n_layers)
encoder2 = Encoder(EMBEDDING_SIZE, hidden_size, n_layers)
rank_model = RankModel(encoder, encoder2, n_layers, hidden_size)
learning_rate = 0.001
optimizer = optim.SGD(rank_model.parameters(), lr=learning_rate)
def run_iteration(self, model, batched_sample):
targets = sample['probability'].squeeze(1)
predicted = model.forward(sample['vec-in1'], sample['vec-in2'])
loss = criterion(predictions, targets)
loss.backward()
for i_batch, batched_data in enumerate(dataloader):
optimizer.zero_grad()
run_iteration(model, batched_data)
optimizer.step()
Almost the same training code was used to train another model, so I’m pretty sure that the training code is ok. It seems that the problem in matrix/vector operations in the RankModel. I tried to change some .view() parameters but without success. Maybe someone could help?