In numpy, when i have a 3D tensor X
with shape [A, B, C] and a 2D tensor Y
with shape [C, D], then np.dot(X, Y)
gives a 3D tensor with shape [A, B, D].
In PyTorch, i can do this as below. However, it seems that 2nd method
is numerically unstable. How can i fix this?
X = Variable(torch.randn(2, 30, 400))
Y = Variable(torch.randn(400, 400))
# 1st method
outs = []
for i in range(X.size(0)):
out = torch.mm(X[i], Y)
outs.append(out)
result1 = torch.stack(outs) # shape of (2, 3, 4)
# 2nd method
result2 = X.resize(2*30, 400).mm(Y)
result2 = result2.resize(2, 30, 400)
# 3rd method
result3 = torch.bmm(X, Y.unsqueeze(0).expand(X.size(0), *Y.size()))
assert np.allclose(result1.data.numpy(), result2.data.numpy()) # this causes an error
assert np.allclose(result1.data.numpy(), result3.data.numpy())
assert np.allclose(result2.data.numpy(), result3.data.numpy()) # this causes an error
assert np.allclose(result2.data.numpy(), result3.data.numpy(), 1e-2) # this doesn't cause an error