Hi all,

I´m new to PyTorch, and I’m trying to train (on a GPU) a simple BiLSTM for a regression task. I have 65 features and the shape of my training set is (1969875, 65). The specific architecture of my model is:

```
LSTM(
(lstm2): LSTM(65, 260, num_layers=3, bidirectional=True)
(linear): Linear(in_features=520, out_features=1, bias=True)
)
```

I’m using batch size of 64.

The GPU is a “NVIDIA Tesla P100 16GB”

The error I get is:

```
RuntimeError: CUDA out of memory. Tried to allocate 7.63 GiB (GPU 0; 15.90 GiB total capacity; 12.06 GiB already allocated; 3.16 GiB free; 12.08 GiB reserved in total by PyTorch)
srun: error: gpu018: task 0: Exited with exit code 1
```

I’m starting to think that I’m doing something wrong in my code since I think 16GB should be more than enough for this amount of data and this model.

I share here my code, maybe someone can tell me if I’m doing something wrong or missing something:

```
num_features = 65
HIDDEN_SIZE = num_features * 4
BATCH_SIZE = 64
OUTPUT_DIM = 1
NUM_LAYERS = 2
LEARNING_RATE = 0.0005
NUM_EPOCHS = 500
SEED = 42
# Set seeds for python, numpy and torch
np.random.seed(SEED)
torch.manual_seed(SEED)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
class LSTM(nn.Module):
def __init__(self, input_dim, hidden_dim, batch_size, output_dim, num_layers):
super(LSTM, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = num_layers
self.lstm2 = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers, bidirectional=True)
self.linear = nn.Linear(self.hidden_dim*2, output_dim)
def forward(self, input):
lstm_out2, self.hidden = self.lstm2(input)
y_pred = self.linear(lstm_out2)
return y_pred
if __name__ == '__main__':
if torch.cuda.is_available():
dtype = torch.cuda.FloatTensor
else:
dtype = torch.float
file = 'D:\\QU-Lab\\coding\\PyTorch\\LSTM\\featuresNorm_MFCC_Extended20.csv'
file = 'Z:\\opt\\Noise_Level\\mfcc_data\\featuresNorm_MFCC_Extended20.csv'
file = '../mfcc_data/featuresNorm_MFCC_Extended20.csv'
features = load_data("file.csv")
X = np.hstack((features.iloc[:, 1:66].values, features['FILE'].values.reshape(len(features), 1)))
y = features['LABEL_LEVEL'].values
# Split the data into training and testing
X_train = torch.from_numpy(X_train[:, 0:65].astype(np.float32)).type(dtype)
X_train = X_train.unsqueeze(0)
y_train = torch.from_numpy(y_train.astype(np.float32)).type(dtype)
X_test = torch.from_numpy(X_test[:, 0:65].astype(np.float32)).type(dtype)
X_test = X_test.unsqueeze(0)
y_test = torch.from_numpy(y_test.astype(np.float32)).type(dtype)
lstm_model = LSTM(num_features, HIDDEN_SIZE, batch_size=BATCH_SIZE, output_dim=OUTPUT_DIM, num_layers=NUM_LAYERS)
# The same but with variables values
# lstm_model = LSTM(65, 260, 64, 1, 2)
lstm_model.to(device)
loss_function = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=LEARNING_RATE)
hist = np.zeros(NUM_EPOCHS)
for epoch in range(NUM_EPOCHS):
lstm_model.zero_grad()
y_pred = lstm_model(X_train)
y_pred = y_pred.squeeze()
loss = loss_function(y_pred, y_train)
if epoch % 20 == 0:
print("Epoch ", epoch, "MSE: ", loss.item())
hist[epoch] = loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
```

Thank you very much in advance for your help!