Tensor on different device

chagelo · November 30, 2022, 10:06am

    for i, data in enumerate(dataset):
        img = torch.squeeze(data['image'].to(device))
        
        save_path = data['save_path']
        
        h, w = img.shape
        
        num = list(range(0,int(w / 2 - 10))) + list(range(int(w / 2 + 10), w)) 
        # motion_events = np.random.randint(30, high=150)
        motion_events = 1
        k_space_lines = random.sample(num, motion_events)

        class AffineTransform(torch.nn.Module):
            def __init__(self, h, w, n) -> None:
                super().__init__()
                self.n = n
                self.I = torch.nn.Parameter(torch.randn((1, 1, h, w), dtype=torch.float))
                self.X = torch.nn.Parameter(torch.randn(n, dtype=torch.float))
                self.Y = torch.nn.Parameter(torch.randn(n, dtype=torch.float))
                self.THETA = torch.nn.Parameter(torch.randn(n, dtype=torch.float))
                self.tm = torch.stack([torch.cos(self.THETA), torch.sin(-self.THETA), self.X, torch.sin(self.THETA), torch.cos(self.THETA), self.Y], dim=1).reshape(n, 1, 2, 3)
            def forward(self, i):
                self.tm = self.tm.to(device)
                grid = F.affine_grid(self.tm[i], torch.Size(self.I.size()), align_corners=False)
                return F.grid_sample(self.I, grid, align_corners=False).to(device)
                # return self.I
            def update(self):
                self.tm = torch.stack([torch.cos(self.THETA), torch.sin(-self.THETA), self.X, torch.sin(self.THETA), torch.cos(self.THETA), self.Y], dim=1).reshape(self.n, 1, 2, 3)

        stn = AffineTransform(h, w, motion_events).to(device)
        optimz = torch.optim.Adam(stn.parameters(), lr=0.001)

        print("processing: {}".format(save_path))
        for epoch in range(10000):
            
            corrupted_img = stn(0)

            lsf = torch.nn.L1Loss().to(device)
            optimz.zero_grad()

            diff = lsf(img, corrupted_img[0, 0])
            diff.backward()
            optimz.step()
            stn.update()

I debug find that all tensor in cuda, but it throw me a RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:6 and cpu! I don’t know why.

srishti-git1110 · November 30, 2022, 10:20am

If you’ve found that all tensors are on the correct device, please also make sure that the model instance is on the correct device.

chagelo · November 30, 2022, 10:46am

stn = AffineTransform(h, w, motion_events).to(device), obviously.

vdw · November 30, 2022, 11:14am

I don’t think that torch.nn.Parameter(...) does suffice so that these tensor get moved to the device with .to(device) for the models.

You can wrap wrap all parameters in a nn.ModuleList or something, or change your code to something like

class AffineTransform(torch.nn.Module):
            def __init__(self, h, w, n, device='cpu') -> None:
                super().__init__()
                self.n = n
                self.I = torch.nn.Parameter(torch.randn((1, 1, h, w), dtype=torch.float)).to(device)
                ...

stn = AffineTransform(h, w, motion_events, device=device).to(device)

Maybe there are more elegant solutions but it should work as I just had this issue myself.

chagelo · November 30, 2022, 11:29am

So, I try much more, and finally solve it. What I do is put tm = torch.stack([torch.cos(self.THETA), torch.sin(-self.THETA), self.X, torch.sin(self.THETA), torch.cos(self.THETA), self.Y], dim=1).reshape(n, 1, 2, 3) in forward, but i don’t know why