RunTime Error due to tensors being on different devices

device = "cuda" if torch.cuda.is_available() else "cpu"
class LSTM(nn.Module):
    def __init__(self, n_hidden = 128,):
        super(LSTM, self).__init__()
        self.n_hidden = n_hidden
        #self.linearIn = nn.Linear(6, 128)
        self.lstm1 = nn.LSTMCell(1, self.n_hidden,)
        self.lstm2 = nn.LSTMCell(self.n_hidden, self.n_hidden)
        self.linear = nn.Linear(self.n_hidden, 1)
    
    def forward(self, input, future=0):
        outputs = []
        h_t = torch.zeros(input.size(0), 128, dtype=torch.float32)
        c_t = torch.zeros(input.size(0), 128, dtype=torch.float32)
        h_t2 = torch.zeros(input.size(0), 128, dtype=torch.float32)
        c_t2 = torch.zeros(input.size(0), 128, dtype=torch.float32)
        
        for input_t in input.split(1, dim=1):
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t)
            outputs += [output]
        for i in range(future):# if we should predict the future
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t)
            outputs += [output]
        outputs = torch.cat(outputs, dim=1)
        return outputs
xy = np.loadtxt('bk18dt.csv', delimiter=',', skiprows=1, usecols=(1,2,3,4,5,6,7),)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_Scaled = scaler.fit_transform(xy)
X = torch.from_numpy(X_Scaled).float().to(device)
for i, val in enumerate(X):
    X = X.to(device)
train_data = X[3:, :-1]
target_data = X[3:, 1:]
test_data = X[:3, :-1]
test_target = X[:3, 1:]
model = LSTM().to(device)

criterion = nn.MSELoss()
optimizer = optim.LBFGS(model.parameters(), lr = 0.03)

for i in range(128):
        print('STEP: ', i)
        def closure():
            optimizer.zero_grad()
            out = model(train_data).to(device)
            loss = criterion(out, target_data)
            print('loss:', loss.item())
            loss.backward()
            return loss
        optimizer.step(closure)
        # begin to predict, no need to track gradient here
        with torch.no_grad():
            future = 1000
            pred = model(test_data, future=future).to(device)
            loss = criterion(pred[:, :-future], test_target)
            print('test loss:', loss.item())
            yPred = pred.detach().numpy()

So this is my code. I want to train the LSTM on the gpu
Details of my GPU

nvidia-smi
Tue Mar 16 14:39:52 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39       Driver Version: 460.39       CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  GeForce RTX 207...  Off  | 00000000:01:00.0  On |                  N/A |
| 39%   40C    P8    18W / 215W |   1996MiB /  7979MiB |      2%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|    0   N/A  N/A      1623      G   /usr/lib/xorg/Xorg                 59MiB |
|    0   N/A  N/A      2738      G   /usr/lib/xorg/Xorg                451MiB |
|    0   N/A  N/A      2884      G   /usr/bin/gnome-shell               42MiB |
|    0   N/A  N/A      3958      G   ...AAAAAAAAA= --shared-files      343MiB |
|    0   N/A  N/A      7184      G   /usr/lib/firefox/firefox            2MiB |
|    0   N/A  N/A     13860      C   ...ath/miniconda3/bin/python     1069MiB |
+-----------------------------------------------------------------------------+

It tells me the ‘out’ is in cpu, when I have pushed it to the GPU. Any help would be awesome!

Error


RuntimeError: Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU (while checking arguments for addmm)

This is happening because you did not put the input data and the hidden and cell states onto the gpu. You need to put both of those on the gpu then it should work.

I tried that too! But it’s still the same. And by input data you mean the training samples correct? If that’s the case, I have add 'em to the GPU

Yes that is what I mean.

Does it give you the same error. Or a different one.

Is this the correct way of push to GPU?

For all the hidden states right?

Yes all hidden and the cell states.

1 Like

It’s the same error.

I did it, but no luck.

Can you send your new code?

device = "cuda" if torch.cuda.is_available() else "cpu"
class LSTM(nn.Module):
    def __init__(self, n_hidden = 128,):
        super(LSTM, self).__init__()
        self.n_hidden = n_hidden
        #self.linearIn = nn.Linear(6, 128)
        self.lstm1 = nn.LSTMCell(1, self.n_hidden,)
        self.lstm2 = nn.LSTMCell(self.n_hidden, self.n_hidden)
        self.linear = nn.Linear(self.n_hidden, 1)
    
    def forward(self, input, future=0):
        outputs = []
        h_t = torch.zeros(input.size(0), 128, dtype=torch.float32).to(device)
        c_t = torch.zeros(input.size(0), 128, dtype=torch.float32).to(device)
        h_t2 = torch.zeros(input.size(0), 128, dtype=torch.float32).to(device)
        c_t2 = torch.zeros(input.size(0), 128, dtype=torch.float32).to(device)
        
        for input_t in input.split(1, dim=1):
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t).to(device)
            outputs += [output]
        for i in range(future):# if we should predict the future
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t)
            outputs += [output]
        outputs = torch.cat(outputs, dim=1)
        return outputs

New code, I added it to the c_t as well

Did you put test data and train data on the gpu like this

train_data = train_data.to(device)
test_data = test_data.to(device)
```?
Can you send your train loop too?
1 Like

Okay, I thought, just by doing X.to(device) is enough.

X = torch.from_numpy(X_Scaled).float().to(device)
for i, val in enumerate(X):
    X = X.to(device)

#before
train_data = X[3:, :-1]
target_data = X[3:, 1:]
test_data = X[:3, :-1]
test_target = X[:3, 1:]

#after

train_data = X[3:, :-1].to(device)
target_data = X[3:, 1:].to(device)
test_data = X[:3, :-1].to(device)
test_target = X[:3, 1:].to(device)

Oh no sorry I should have clarified you have to redefine the variable. Did it work?

1 Like

Thanks mate! Yes it worked!

1 Like