When i test this with
it = iter(train_loader)
imgs = [next(it)[0] for i in range(100)]
print(len(imgs))
print(imgs[0].shape)
mod = ConvAutoEncoder4()
crtrn = nn.MSELoss()
ptmzr = optimizer = optim.Adam(mod.parameters(), lr=1e-3)
torch.autograd.set_detect_anomaly(False)
ptmzr.zero_grad()
res = mod(imgs[0])
lss = crtrn(res, imgs[0])
lss.backward()
ptmzr.step()
print(lss.item())
ptmzr.zero_grad()
res = mod(imgs[1])
lss = crtrn(res, imgs[1])
lss.backward()
ptmzr.step()
print(lss.item())
The ouput is
100
torch.Size([1, 1, 28, 28, 28])
torch.Size([1, 4, 7, 7, 7])
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0209, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
grad_fn=<UnbindBackward>)
tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0074, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
grad_fn=<UnbindBackward>)
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
torch.Size([1, 4, 7, 7, 7])
tensor([[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506]],
grad_fn=<UnbindBackward>)
tensor([[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506]],
grad_fn=<UnbindBackward>)
tensor([[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.5038, -0.4605, -0.4621, -0.4506],
[-0.4506, -0.4506, -0.4553, -0.4688, -0.4506, -0.4613, -0.4506],
[-0.4506, -0.4518, -0.5405, -0.4506, -0.4505, -0.4559, -0.4506],
[-0.4506, -0.4724, -0.4928, -0.4506, -0.4550, -0.4908, -0.4506],
[-0.4506, -0.4837, -0.4551, -0.4715, -0.4912, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506]],
grad_fn=<UnbindBackward>)
tensor([[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4650, -0.5366, -0.4510, -0.4506],
[-0.4506, -0.4506, -0.4545, -0.5428, -0.4506, -0.4499, -0.4506],
[-0.4506, -0.4520, -0.5970, -0.4506, -0.4505, -0.4499, -0.4506],
[-0.4506, -0.5225, -0.5748, -0.4506, -0.4538, -0.5510, -0.4506],
[-0.4506, -0.5077, -0.4728, -0.4642, -0.5557, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506]],
grad_fn=<UnbindBackward>)
tensor([[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4657, -0.5845, -0.4610, -0.4506],
[-0.4506, -0.4506, -0.4750, -0.5655, -0.4506, -0.4499, -0.4506],
[-0.4506, -0.4508, -0.5578, -0.4506, -0.4506, -0.4499, -0.4506],
[-0.4506, -0.4942, -0.6176, -0.4506, -0.4762, -0.5629, -0.4506],
[-0.4506, -0.4777, -0.5257, -0.5045, -0.5382, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506]],
grad_fn=<UnbindBackward>)
tensor([[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506]],
grad_fn=<UnbindBackward>)
tensor([[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506],
[-0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506, -0.4506]],
grad_fn=<UnbindBackward>)
0.03493741527199745
torch.Size([1, 4, 7, 7, 7])
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0405, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, 0.0020, 0.0000, 0.0000, 0.0000]],
grad_fn=<UnbindBackward>)
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
tensor([[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0.]], grad_fn=<UnbindBackward>)
torch.Size([1, 4, 7, 7, 7])
tensor([[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan]], grad_fn=<UnbindBackward>)
tensor([[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan]], grad_fn=<UnbindBackward>)
tensor([[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan]], grad_fn=<UnbindBackward>)
tensor([[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan]], grad_fn=<UnbindBackward>)
tensor([[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan]], grad_fn=<UnbindBackward>)
tensor([[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan]], grad_fn=<UnbindBackward>)
tensor([[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan],
[nan, nan, nan, nan, nan, nan, nan]], grad_fn=<UnbindBackward>)
nan
The same happens when I use a dataloader