```
device = "cuda" if torch.cuda.is_available() else "cpu"
class LSTM(nn.Module):
def __init__(self, n_hidden = 128,):
super(LSTM, self).__init__()
self.n_hidden = n_hidden
#self.linearIn = nn.Linear(6, 128)
self.lstm1 = nn.LSTMCell(1, self.n_hidden,)
self.lstm2 = nn.LSTMCell(self.n_hidden, self.n_hidden)
self.linear = nn.Linear(self.n_hidden, 1)
def forward(self, input, future=0):
outputs = []
h_t = torch.zeros(input.size(0), 128, dtype=torch.float32)
c_t = torch.zeros(input.size(0), 128, dtype=torch.float32)
h_t2 = torch.zeros(input.size(0), 128, dtype=torch.float32)
c_t2 = torch.zeros(input.size(0), 128, dtype=torch.float32)
for input_t in input.split(1, dim=1):
h_t, c_t = self.lstm1(input_t, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t)
outputs += [output]
for i in range(future):# if we should predict the future
h_t, c_t = self.lstm1(output, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t)
outputs += [output]
outputs = torch.cat(outputs, dim=1)
return outputs
```

```
xy = np.loadtxt('bk18dt.csv', delimiter=',', skiprows=1, usecols=(1,2,3,4,5,6,7),)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_Scaled = scaler.fit_transform(xy)
X = torch.from_numpy(X_Scaled).float().to(device)
```

```
for i, val in enumerate(X):
X = X.to(device)
```

```
train_data = X[3:, :-1]
target_data = X[3:, 1:]
test_data = X[:3, :-1]
test_target = X[:3, 1:]
```

```
model = LSTM().to(device)
criterion = nn.MSELoss()
optimizer = optim.LBFGS(model.parameters(), lr = 0.03)
```

```
for i in range(128):
print('STEP: ', i)
def closure():
optimizer.zero_grad()
out = model(train_data).to(device)
loss = criterion(out, target_data)
print('loss:', loss.item())
loss.backward()
return loss
optimizer.step(closure)
# begin to predict, no need to track gradient here
with torch.no_grad():
future = 1000
pred = model(test_data, future=future).to(device)
loss = criterion(pred[:, :-future], test_target)
print('test loss:', loss.item())
yPred = pred.detach().numpy()
```

So this is my code. I want to train the LSTM on the gpu

Details of my GPU

```
nvidia-smi
Tue Mar 16 14:39:52 2021
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39 Driver Version: 460.39 CUDA Version: 11.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 GeForce RTX 207... Off | 00000000:01:00.0 On | N/A |
| 39% 40C P8 18W / 215W | 1996MiB / 7979MiB | 2% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 1623 G /usr/lib/xorg/Xorg 59MiB |
| 0 N/A N/A 2738 G /usr/lib/xorg/Xorg 451MiB |
| 0 N/A N/A 2884 G /usr/bin/gnome-shell 42MiB |
| 0 N/A N/A 3958 G ...AAAAAAAAA= --shared-files 343MiB |
| 0 N/A N/A 7184 G /usr/lib/firefox/firefox 2MiB |
| 0 N/A N/A 13860 C ...ath/miniconda3/bin/python 1069MiB |
+-----------------------------------------------------------------------------+
```

It tells me the ‘out’ is in cpu, when I have pushed it to the GPU. Any help would be awesome!

Error

```
RuntimeError: Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU (while checking arguments for addmm)
```