Hi, I’m using GRU with 1 custom loss function. It seems that everything go right in forward direction, but when I call loss.backward()
, my program has this error:
Traceback (most recent call last):
File "/home/vietpd/Python/local/lib/python2.7/site-packages/grpc/_server.py", line 389, in _take_response_from_response_iterator
return next(response_iterator), True
File "app.py", line 344, in trainRNN
}, save_path=RNN_PATH)
File "/home/vietpd/Python/CafebizPersonalize/deeplearning/pytorch/SessionRnnLayer.py", line 168, in fit
train_loss = _train(model, train_set, optimizer, epoch) # train over train_set
File "/home/vietpd/Python/CafebizPersonalize/deeplearning/pytorch/SessionRnnLayer.py", line 217, in _train
loss.backward()
File "/home/vietpd/Python/local/lib/python2.7/site-packages/torch/autograd/variable.py", line 121, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/home/vietpd/Python/local/lib/python2.7/site-packages/torch/autograd/__init__.py", line 83, in backward
variables, grad_variables, retain_graph, create_graph)
RuntimeError: The size of tensor a (4) must match the size of tensor b (204) at non-singleton dimension 1
This is my model:
class SessionRnnLayer(nn.Module):
def __init__(self, embedding_size, n_items):
super(SessionRnnLayer, self).__init__()
self.embedding_size = embedding_size
self.features = nn.GRU(embedding_size, embedding_size, 1, batch_first=True, dropout=0.5)
self.embedding = nn.Embedding(n_items, embedding_size)
def forward(self, x, y, hidden=None):
lengths = [len(session) for session in x]
inputs = [Variable(torch.LongTensor(session)) for session in x]
inputs = [self.embedding(session) for session in inputs]
inputs = pad_sequence(inputs, batch_first=True)
inputs = pack_padded_sequence(inputs, lengths, batch_first=True)
# convert to cuda tensors if cuda flag is true
if USE_CUDA:
inputs = inputs.cuda()
features, hidden = self.features(inputs, hidden)
if isinstance(features, PackedSequence):
features, lengths = pad_packed_sequence(features, batch_first=True)
loss = map(partial(self._get_loss, loss_func=bpr_max_loss), [feature[:seq_len] for feature, seq_len in zip(features, lengths)], y)
return torch.mean(torch.cat(loss, dim=0)), features, hidden
def _get_loss(self, feature, label, loss_func=None):
label = torch.LongTensor(label)
# convert to cuda tensors if cuda flag is true
if USE_CUDA:
label = label.cuda()
label = Variable(label)
target = self.embedding(label)
logits = torch.matmul(feature[:seq_len], target.t())
return loss_func(logits)
def init_embed(self, embed):
self.embedding.weight = nn.Parameter(torch.from_numpy(embed).float())
self.embedding.weight.requires_grad = False
And this is my loss function:
def bpr_max_loss(logits, alpha=0.5):
softmax_scores = softmax_neg(logits).t()
logits_T = logits.t()
diag = torch.diag(logits, 0)
return torch.mean(-torch.log(torch.sum(F.sigmoid(diag - logits_T) * softmax_scores, 0) + 1e-24)
+ alpha * torch.sum((logits_T ** 2) * softmax_scores, 0))
def softmax_neg(X):
hack_matrix = np.ones(X.size(), dtype=np.float32)
np.fill_diagonal(hack_matrix, 0)
hack_matrix = Variable(torch.from_numpy(hack_matrix).float())
X = X * hack_matrix
X_max = torch.max(X, 1)[0].unsqueeze(1).expand_as(X)
X = (X - X_max) * hack_matrix
return F.softmax(X, dim=1)
- x: batch x list of input id
Ex: [ [1, 2, 3], [7, 8] ]- y: batch x (list of target id + N negative samples id)
Ex (N=1): [ [2, 3, 4, 5], [8, 9, 10] ]
I found out that when I set N=0 (mean that size(x)=size(y)
), my code will run smoothly. When I set N=200, it show the above error message
How can I debug, and fix this? Thanks for reading.