My code wok fine on Colabolatory with CUDA.
Now I try to do on local environment.
My env is;
CUDA: 10.1
cuDNN: 7
and today I installed PyTorch follows instruction of pip3.
After the setting up I meet an error;
RuntimeError: expected device cpu and dtype Float but got device cuda:0 and dtype Float
Acutually my code set device(“cuda”), I want it run on NVIDIA GPU.
I can not understand reason why script try to run on CPU, while Colab has no error (indeed run on cpu).
In addition, I replaced operators with torch’s operators, but still I get the error.
Could you tell me reason and its solution?
Traceback (most recent call last):
File "QNet.py", line 203, in <module>
out = model()
File "/home/syouyu/.local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 547, in __call__
result = self.forward(*input, **kwargs)
File "QNet.py", line 164, in forward
fw_prop(self)
File "QNet.py", line 70, in fw_prop
q_layer(self)
File "QNet.py", line 60, in q_layer
q_sel(self)
File "QNet.py", line 53, in q_sel
self.sel[index] = torch.sigmoid(torch.add(torch.mul(self.w_x[index], self.x[index]), torch.mul(self.w_h[index], self.h[index])))
RuntimeError: expected device cpu and dtype Float but got device cuda:0 and dtype Float
Tensors are defined as;
class Model(nn.Module):
def __init__(self):
self.w_x = [torch.randn((NUM_INPUT), requires_grad=True) for _ in range(NUM_HIDDEN)]
self.w_h = [torch.randn((NUM_INPUT), requires_grad=True) for _ in range(NUM_HIDDEN)]
self.sel = [torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]
self.x = [torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]
self.h = [torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]
When you are converting your model with model.cuda(), the Tensor's you have will not be converted: only children Modules will be automatically converted.
You will have to manually specify the device of your tensors in the creation:
class Model(nn.Module):
def __init__(self, device):
super().__init__()
self.w_x = [torch.randn((NUM_INPUT), requires_grad=True, device=device) for _ in range(NUM_HIDDEN)]
self.w_h = [torch.randn((NUM_INPUT), requires_grad=True, device=device) for _ in range(NUM_HIDDEN)]
self.sel = [torch.zeros(NUM_INPUT, device=device) for _ in range(NUM_HIDDEN)]
self.x = [torch.zeros(NUM_INPUT, device=device) for _ in range(NUM_HIDDEN)]
self.h = [torch.zeros(NUM_INPUT, device=device) for _ in range(NUM_HIDDEN)]
Since you are initializing the tensors with requires_grad=True, you should wrap them into nn.Parameter, so that they will be properly registered in the state_dict and will be automatically pushed to the device, if you call model.to(device).
Also, since you are storing these parameters in a list, use ParameterList, as a plain Python list won’t register the parameters properly.
This should work:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.w_x = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.w_h = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.sel = nn.ParameterList([nn.Parameter(torch.zeros(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.x = nn.ParameterList([nn.Parameter(torch.zeros(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.h = nn.ParameterList([nn.Parameter(torch.zeros(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
model = Model()
model.cuda()
print(model.parameters())
I updated code with your advices, and I got other error;
File "QNet.py", line 62, in q_sel
self.sel[index] = torch.sigmoid(torch.add(torch.mul(self.w_x[index], self.x[index]), torch.mul(self.w_h[index], self.h[index])))
RuntimeError: expected device cuda:0 and dtype Float but got device cpu and dtype Float
Does this mean that my setting on local PC is incorrect ?
#!/usr/bin/env python
# coding: utf-8
from torchvision import datasets, transforms, models
import torch
from torch import nn, optim, utils, device as device_, cuda
import numpy as np
# Architecture Hyper-Parameters
NUM_INPUT = 28
TIME_STEPS = 28
NUM_CLASS = 10
NUM_HIDDEN = 28
BATCH_SIZE = 1024
EPOCH = 64
def one_hot_embedding (y, length):
out = torch.zeros(length)
out[y] = 1.0
def q_sel (self):
for index in range(NUM_HIDDEN):
self.sel[index] = torch.sigmoid(self.w_x[index] * self.x[index] + self.w_h[index] * self.h[index])
def q_layer (self):
q_sel(self)
def fw_prop (self):
q_layer(self)
dataset_train = datasets.MNIST(
'~/mnist',
train=True,
download=True,
transform=transforms.ToTensor())
dataloader_train = utils.data.DataLoader(dataset_train,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=4)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
# Gate-Weight
self.w_x = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.w_h = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
# Gate-Selector
self.sel = [torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]
# Input Vector
self.x = [torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]
# Output Vector
self.h = [torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]
def forward(self):
fw_prop(self)
model = Model()
model.cuda()
print(model.parameters())
for name, param in model.named_parameters():
if param.device.type != 'cuda':
print('param {}, not on GPU'.format(name))
for epoch in range(EPOCH):
for x, t in dataloader_train:
y = one_hot_embedding(t, NUM_CLASS)
for time in range(TIME_STEPS):
model.x[0] = x[0][0][time]
model.h[0] = torch.zeros(NUM_INPUT)
model.zero_grad()
out = model()
makes error of;
File "test.py", line 24, in q_sel
self.sel[index] = torch.sigmoid(self.w_x[index] * self.x[index] + self.w_h[index] * self.h[index])
RuntimeError: expected device cuda:0 and dtype Float but got device cpu and dtype Float
self.sel , self.x , self.h are not parameters.
So, I think wrapping as parameter does not allow substitution.
It makes an error of;
in register_parameter
.format(torch.typename(param), name))
TypeError: cannot assign 'torch.FloatTensor' object to parameter '0' (torch.nn.Parameter or None required)
I triied to do;
self.sel = [torch.zeros(NUM_INPUT, device='cuda') for _ in range(NUM_HIDDEN)]
self.x = [torch.zeros(NUM_INPUT, device='cuda') for _ in range(NUM_HIDDEN)]
self.h = [torch.zeros(NUM_INPUT, device='cuda') for _ in range(NUM_HIDDEN)]
Thanks for the information. I’ve missed that these tensors should not require gradients.
In that case, you could register them as buffers using:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.w_x = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.w_h = nn.ParameterList([nn.Parameter(torch.randn(NUM_INPUT)) for _ in range(NUM_HIDDEN)])
self.register_buffer('sel', torch.stack([torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]))
self.register_buffer('x', torch.stack([torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]))
self.register_buffer('h', torch.stack([torch.zeros(NUM_INPUT) for _ in range(NUM_HIDDEN)]))