Hi, can someone tell me what I need to do to back-propagate when I am working with stacked tensors? For instance, when I try a simple matrix multiplication of two 3 x 3 matrices and work back from there, it works. But for the same case, if I split one of the matrices into 3 individual 3 x 1 matrices and stack them and then do the multiplication, it doesn’t seem to work.
import torch
device = torch.device('cpu')
x = torch.tensor([[1,2,3],[4,5,6],[7,8,9]], dtype=torch.float)
y = torch.tensor([[ 2.8191, 4.3610, 3.7487],
[ 3.8279, 12.9571, 10.9117],
[ 4.8367, 21.5532, 18.0747]])
w1 = torch.randn(3, 1, device=device, requires_grad=True)
w2 = torch.randn(3, 1, device=device, requires_grad=True)
w3 = torch.randn(3, 1, device=device, requires_grad=True)
w = torch.stack((w1, w2, w3))
w.squeeze_(-1)
print(w)
learning_rate = 1e-5
for t in range(20000):
y_pred = x.mm(w)
loss = (y_pred - y).pow(2).sum()
if t%5000==0:
print(t, loss.item())
loss.backward()
with torch.no_grad():
# print("Grad=",w1.grad)
w1 -= learning_rate * w1.grad
# print(w1, learning_rate * w1.grad)
w2 -= learning_rate * w2.grad
w3 -= learning_rate * w3.grad
# Manually zero the gradients after running the backward pass
w1.grad.zero_()
w2.grad.zero_()
w3.grad.zero_()
print(y_pred)