RuntimeError: Tensor for argument #3 'mat2' is on CPU, but expected it to be on GPU (while checking arguments for addmm)

I realize that this error is because something has not been moved to the GPU - but I don’t understand what tensor the error is referring to?

The network:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.nHidden=30
        self.nFilter=20
        self.conv1 = nn.Conv2d(1,self.nFilter,(5, 5), stride=(1, 1), padding=(2, 2))
        self.birnn = nn.GRU(self.nFilter*129,self.nHidden,1,bidirectional=True,batch_first=True)

        #attention-layer:
        self.attentionSize=64
        self.attLin=nn.Linear(2*self.nHidden,self.attentionSize)
        self.uOmega=torch.rand(self.attentionSize)
        
        #output:
        self.fc=nn.Linear(2*self.nHidden,5)
        
    def forward(self, x):
        
        #filtering:
        x = F.relu(self.conv1(x))
        
        #biGRU:
        x=x.permute((0,2,1,3))
        x=x.reshape((-1,29,20*129))
        x,hn = self.birnn(x)

        #attention:
        v=torch.tanh(self.attLin(x))
        vu=torch.matmul(v,self.uOmega)
        alpha=F.softmax(vu,dim=1)
        # print(x.permute((0,2,1)).size(),torch.unsqueeze(alpha,2).size())
        abar=torch.matmul(x.permute((0,2,1)),torch.unsqueeze(alpha,2))
        abar=abar.permute((0,2,1))
        
        #out:
        x=F.relu(self.fc(abar))
        # x=F.softmax(x,1)
        
        return torch.squeeze(x)
    
net=Net()

The calling code:

trainSampler=torch.utils.data.DataLoader(trainDataset,batch_size=128,
                                         shuffle=True,drop_last=True)
valSampler=torch.utils.data.DataLoader(valDataset,batch_size=128,
                                         shuffle=False,drop_last=False)

cuda=torch.device('cuda:0')
net.to(device=cuda)
for  p in net.parameters():
    print(p.device)
    
loss_fn = nn.NLLLoss()

learning_rate=1e-4
optimizer=torch.optim.Adam(net.parameters(),learning_rate)




for iEpoch in range(20):
    net.train()
    for xtemp, ytemp in trainSampler:
        xbatch=xtemp.to(device=cuda)
        ybatch=ytemp.to(cuda)
        
        
        y_pred = net(xbatch)
        
        loss = loss_fn(y_pred,ybatch)
    
        # Zero the gradients before running the backward pass.
        net.zero_grad()
    
        # Backward pass: compute gradient of the loss with respect to all the learnable
        # parameters of the model. Internally, the parameters of each Module are stored
        # in Tensors with requires_grad=True, so this call will compute gradients for
        # all learnable parameters in the model.
        loss.backward()
        optimizer.step()
    

Some output for diagnosing:

print(torch.cuda.current_device())
0

print(torch.cuda.device(0))
<torch.cuda.device object at 0x000001518001CC70>

print(torch.cuda.device_count())
1

print(torch.cuda.get_device_name(0))
GeForce GTX 1060

print(torch.cuda.is_available())
True


for  p in net.parameters():
    print(p.device)
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0

xbatch.device
 device(type='cuda', index=0)

I assume that the variables created in forward(), since they are based entirely on variables which are already on the gpu, will also be on the gpu? The error is coming from

File “D:\anaconda3\lib\site-packages\torch\nn\modules\module.py”, line 727, in _call_impl
result = self.forward(*input, **kwargs)

I hope someone can spot what it is I need to move? the optimizer? if so, how do I do that?

I think I found the error myself:

‘uOmega’ was not added correctly to the network, so it was not moved when I called net.to(cuda).

The proper initialization is:

        self.uOmega=nn.Parameter(torch.randn(self.attentionSize))