Run time error nothing is working and the error is when predicting the test set

for epoch in range(150):
    train, valid = random_split(train_nn, [850000, 50000])
    
    
    trainloader = DataLoader(train, batch_size=BATCH_SIZE)
    validloader = DataLoader(valid, batch_size=BATCH_SIZE) 
    
    for i in range(len(train_nn)//BATCH_SIZE):
            train_data = next(iter(trainloader))
            validation_data = next(iter(validloader))
            
            train_x = train_data[:,0:-1]
            train_y = train_data[:,-1]
            
            validation_x = validation_data[:,0:-1]
            validation_y = validation_data[:,-1]


            y_hat = model(train_x).reshape((BATCH_SIZE,))

            cost = LOSS(y_hat, train_y)

            cost.backward()

            OPTIMIZER.step()
            OPTIMIZER.zero_grad()
            with torch.no_grad():
                roc_auc_values_validation.append(roc_auc_score(validation_y.cpu(), model(validation_x).detach().cpu().numpy()))

                roc_auc_values_train.append(roc_auc_score(train_y.cpu(), model(train_x).detach().cpu().numpy()))

    torch.cuda.empty_cache()
    with torch.no_grad():
            model(test_nn).detach().cpu().numpy()
        
    print(f'Epoch: {epoch+1}:',f"cost: {cost}")
    print(f"auc under the ROC curve for the validation set is: {roc_auc_values_validation[-1]}")
    print(f"auc under the ROC curve for the training set is: {roc_auc_values_train[-1]}")
            

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_20280/336595200.py in <module>
     32     torch.cuda.empty_cache()
     33     with torch.no_grad():
---> 34             model(test_nn).detach().cpu().numpy()
     35 
     36     print(f'Epoch: {epoch+1}:',f"cost: {cost}")

~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
   1108         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110             return forward_call(*input, **kwargs)
   1111         # Do not call functions when jit is used
   1112         full_backward_hooks, non_full_backward_hooks = [], []

~\AppData\Local\Temp/ipykernel_20280/317492893.py in forward(self, x)
     15 
     16     def forward(self, x):
---> 17         x = F.relu(self.input_layer(x))
     18 
     19         x = F.relu(self.hidden_layer1(x))

~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
   1108         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110             return forward_call(*input, **kwargs)
   1111         # Do not call functions when jit is used
   1112         full_backward_hooks, non_full_backward_hooks = [], []

~\anaconda3\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
    101 
    102     def forward(self, input: Tensor) -> Tensor:
--> 103         return F.linear(input, self.weight, self.bias)
    104 
    105     def extra_repr(self) -> str:

RuntimeError: CUDA out of memory. Tried to allocate 2.67 GiB (GPU 0; 6.00 GiB total capacity; 3.26 GiB already allocated; 1.16 GiB free; 3.27 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

What is the BATCH_SIZE here? I would consider reducing it if that’s possible.