device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available() #True
word_model = word_model.to(device)
word_model.cuda()
for i,(x,y) in enumerate(dataset):
if i < 1:
x = x.cuda()
output = word_model(x)
i+=1
print(output)
if(i>=1):
break
And it shows :
RuntimeError: Expected object of device type cuda but got device type cpu for argument #2 'mat2' in call to _th_mm
I guess that my input and model are not in the same device, but not sure why.
below is my model, but i’m not sure if this is where the error come from
class MojiNet(nn.Module):
def __init__(self,vocab_size,embedding_size,hidden_size,pretrained_embed,embed_dropout,model_dropout,num_layers,class_num):
super(MojiNet,self).__init__()
self.hidden_size = hidden_size
if(pretrained_embed):
self.embedding = nn.Embedding.from_pretrained(torch.FloatTensor(numpy_embed)).to(device)
else:
self.embedding = nn.Embedding(vocab_size,embedding_size)
self.embed_dropout = nn.Dropout(embed_dropout)
self.sent_gru = nn.GRU(embedding_size,hidden_size,num_layers,dropout=(0 if num_layers == 1 else model_dropout),bidirectional=True,batch_first=True)
self.fc = nn.Linear(hidden_size,class_num)
self.fc_dropout = nn.Dropout(0.5)
# self.attn = Attn(hidden_size)
def forward(self,x):
x = self.embedding(x)
x = self.embed_dropout(x)
#(batch,max_len,embedding_size)
y,_ = self.sent_gru(x)
#batch,max_len,2*hidden_size(bidirectional)
y = y[:,:,:hidden_size] + y[:,:,hidden_size:]
#batch,max_len,hidden_size(head + tail)
alpha = Attn(y,hidden_size)
#batch,1,max_len
r = alpha.bmm(y).squeeze(1)
#batch,hidden_size
h = torch.tanh(r)
#batch,hidden_size
output = self.fc(h)
#batch,class_num
output = self.fc_dropout(output)
return output
I create the Attention function Attn() outside the model, but im not sure if this is the problem,since it didn’t showed the information of it
Okay, i just solved the problem by myself, the reason of this is the Attn() function which i wrote outside the model class as another def() function, and the Attn() function will not be moved to the GPU, so I create a new nn.Module class for Attn and i wrote : self.attn = Attn(hidden_size) in the model.