Hi, I’m having some trouble which results in the following error:

```
pred_log_probs = estimator.forward(x_train[:, :])
File "D:\Google_Drive\Projects\FeedForward_detection\NN_detector_module_seperated_output.py", line 84, in forward
x = F.relu((self.layer_1(x)))
File "C:\Users\sholev\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\nn\functional.py", line 643, in relu
return torch.relu(input)
RuntimeError: CUDA error: out of memory
```

The strange thing is that this error arises after 7 epochs, so it seems like some GPU memory allocation is not being released.

The NN architecture is the following:

```
class symbols_detector(nn.Module):
def __init__(self, num_of_symbols, N1, N2):
super(symbols_detector, self).__init__()
self.num_of_symbols = num_of_symbols
self.N1 = N1
self.N2 = N2
self.Lin = (N1*2 + 2)*N2
self.layer_1 = nn.Linear(self.Lin , L1)
self.layer_2 = nn.Linear(L1, L2)
self.layer_3 = nn.Linear(L2, L3)
self.layer_4 = nn.Linear(L3, L4)
self.layer_5 = nn.Linear(L4, self.num_of_symbols * N2)
self.log_p = nn.LogSoftmax(dim=2)
def forward(self, x):
x = F.relu((self.layer_1(x)))
x = F.relu((self.layer_2(x)))
x = F.relu((self.layer_3(x)))
x = F.relu((self.layer_4(x)))
x = self.layer_5(x)
x = x.view(-1, self.2, self.num_of_symbols)
x = self.log_p(x)
return x
```

The training code is the following:

```
rand_idx = torch.randperm(x_train.shape[0])
x_train.pin_memory()
x_train = x_train[rand_idx, :].cuda()
y_train = y_train[:, rand_idx]
y_train = y_train.cuda()
pred_log_probs = estimator.forward(x_train[:, :])
train_loss = torch.zeros([ep_num+1])
SER_train = np.zeros([ep_num+1])
BER_train = np.zeros([ep_num + 1])
train_loss[0] = cost_func(pred_log_probs.permute([0, 2, 1]), y_train[0, :])
train_loss[0].cpu().data.numpy())
for i in range(0, ep_num):
estimator.train()
for j in range(0, (int(N_train_samples/batch_Size)-2)):
#print(j)
pred_log_probs = estimator.forward(x_train[j * batch_Size:(j + 1) * batch_Size, :])
model_optimizer.zero_grad()
loss1 = cost_func(pred_log_probs.permute([0, 2, 1]), y_train[0, j * batch_Size:(j + 1) * batch_Size])
loss1.backward()
model_optimizer.step()
estimator.eval()
pred_log_probs = estimator.forward(x_train[:, :])
train_loss[i+1] = cost_func(pred_log_probs.permute([0, 2, 1]), y_train[0, :])
rand_idx = torch.randperm(x_train.shape[0])
x_train = x_train[rand_idx, :].cuda()
y_train = y_train[:, rand_idx].cuda()
print('model trained data set')
return train_loss
```

Is there something I’m missing that causes the memory the overflow?

Thanks