Hi,
I have a module that have multiple submodules as attributes. I have wrapped the submodules in nn.ModuleList and nn.ModuleDict. When I call .cuda() on the parent module, the buffers would be copied over to the GPU, but the submodules attributes that originally was the same object as the buffers, is not reassigned to the buffers on the GPU. How do I best fix this? Below is a snippet of a toy code
import torch
class InnerToyModule(torch.nn.Module):
def __init__(self, x):
super(InnerToyModule, self).__init__()
self.ten = torch.tensor(x)
self.register_buffer("ten_" + str(x), self.ten)
class ToyModule(torch.nn.Module):
def __init__(self):
super(ToyModule, self).__init__()
self.inner = torch.nn.ModuleList([InnerToyModule(1), InnerToyModule(2)])
if __name__ == '__main__':
toy = ToyModule()
print(dict(toy.named_buffers()))
print(toy.inner[0].ten is dict(toy.named_buffers())["inner.0.ten_1"])
toy.cuda()
print(toy.inner[0].ten is dict(toy.named_buffers())["inner.0.ten_1"])
print(dict(toy.named_buffers())["inner.0.ten_1"])
output:
{'inner.0.ten_1': tensor(1), 'inner.1.ten_2': tensor(2)}
True
False
tensor(1, device='cuda:0')