Is there any solution other than increasing memory?

model.eval()
with torch.no_grad():
        inputs = torch.tensor(test_df.iloc[:,:].values).to(device)
        test_Y = model(inputs.float())
        survived = torch.max(test_Y, dim=1)[1]
        test_paID = pd.read_csv('sample_submission.csv')['customer_ID']
        sub_df = pd.DataFrame({'customer_ID':test_paID.values, 'prediction':survived})
        print(sub_df)
        sub_df.to_csv('sample_submission.csv', index=False)
RuntimeError                              Traceback (most recent call last)
Cell In[9], line 4
      2 with torch.no_grad():
      3         inputs = torch.tensor(test_df.iloc[:,:].values).to(device)
----> 4         test_Y = model(inputs.float())
      5         survived = torch.max(test_Y, dim=1)[1]
      6         test_paID = pd.read_csv('sample_submission.csv')['customer_ID']

File ~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1488, in Module._call_impl(self, *args, **kwargs)
   1483 # If we don't have any hooks, we want to skip the rest of the logic in
   1484 # this function, and just call forward.
   1485 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1486         or _global_backward_pre_hooks or _global_backward_hooks
   1487         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1488     return forward_call(*args, **kwargs)
   1489 # Do not call functions when jit is used
   1490 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[6], line 30, in Net.forward(self, x)
     28 x = self.f7(x)
     29 x = self.relu(x)
---> 30 x = self.f8(x)
     31 x = self.relu(x)
     32 x = self.f9(x)

File ~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1488, in Module._call_impl(self, *args, **kwargs)
   1483 # If we don't have any hooks, we want to skip the rest of the logic in
   1484 # this function, and just call forward.
   1485 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1486         or _global_backward_pre_hooks or _global_backward_hooks
   1487         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1488     return forward_call(*args, **kwargs)
   1489 # Do not call functions when jit is used
   1490 full_backward_hooks, non_full_backward_hooks = [], []

File ~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/linear.py:114, in Linear.forward(self, input)
    113 def forward(self, input: Tensor) -> Tensor:
--> 114     return F.linear(input, self.weight, self.bias)

RuntimeError: MPS backend out of memory (MPS allocated: 4.80 GB, other allocations: 7.36 GB, max allowed: 18.13 GB). Tried to allocate 7.03 GB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

It looks like you are loading the entire dataframe for inference on the model at once. Would it be possible to split this with some kind of batching if the dataframe is large?