AttributeError: 'NoneType' object has no attribute 'detach'

I am trying to create a hybrid recommender system using pytorch lightning. Here are my dataset and model classes:

import pytorch_lightning as pl
class MIMICDataset(pl.LightningDataModule):
    def __init__(self, train_data, valid_data, test_data, all_codes):
        super().__init__()
        self.train_data = train_data
        self.val_data = valid_data
        self.test_data = test_data
        self.all_codes = all_codes

    def train_dataloader(self):
        train_subjects, train_codes, train_symptoms, train_labels = [], [], [], []
        train_set = set(zip(self.train_data['SUBJECT_ID'], self.train_data['ICD9_CODE'], self.train_data['SYMPTOMS']))
        subject_code_set = set(zip(self.train_data['SUBJECT_ID'], self.train_data['ICD9_CODE']))
        num_negatives = 4
        for s, c in subject_code_set:
            train_subjects.append(s)
            train_codes.append(c)
            train_symptoms.append(np.random.choice(self.train_data[self.train_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
            train_labels.append(1)

            for _ in range(num_negatives):
                negative_item = np.random.choice(self.all_codes)
                while (s, negative_item) in subject_code_set:
                    negative_item = np.random.choice(self.all_codes)
                train_subjects.append(s)
                train_codes.append(negative_item)
                train_symptoms.append(np.random.choice(self.train_data[self.train_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
                train_labels.append(0)
        
        train_subject_tensor = torch.tensor(train_subjects, dtype=torch.long)
        train_code_tensor = torch.tensor(train_codes, dtype=torch.long)
        train_symptom_tensor = torch.tensor(train_symptoms, dtype=torch.long)
        train_label_tensor = torch.tensor(train_labels, dtype=torch.float)
        
        train_dataset = TensorDataset(train_subject_tensor, train_code_tensor, train_symptom_tensor, train_label_tensor)
        return DataLoader(train_dataset, batch_size=512, num_workers=0)
    
    def val_dataloader(self):
        val_subjects, val_codes, val_symptoms, val_labels = [], [], [], []
        val_set = set(zip(self.val_data['SUBJECT_ID'], self.val_data['ICD9_CODE'], self.val_data['SYMPTOMS']))
        subject_code_set = set(zip(self.val_data['SUBJECT_ID'], self.val_data['ICD9_CODE']))
        num_negatives = 4
        for s, c in subject_code_set:
            val_subjects.append(s)
            val_codes.append(c)
            val_symptoms.append(np.random.choice(self.val_data[self.val_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
            val_labels.append(1)

            for _ in range(num_negatives):
                negative_item = np.random.choice(self.all_codes)
                while (s, negative_item) in subject_code_set:
                    negative_item = np.random.choice(self.all_codes)
                val_subjects.append(s)
                val_codes.append(negative_item)
                val_symptoms.append(np.random.choice(self.val_data[self.val_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
                val_labels.append(0)
        
        val_subject_tensor = torch.tensor(train_subjects, dtype=torch.long)
        val_code_tensor = torch.tensor(train_codes, dtype=torch.long)
        val_symptom_tensor = torch.tensor(train_symptoms, dtype=torch.long)
        val_label_tensor = torch.tensor(train_labels, dtype=torch.float)

        val_dataset = TensorDataset(val_subject_tensor, val_code_tensor, val_symptom_tensor, val_label_tensor)
        return DataLoader(val_dataset, batch_size=512, num_workers=0)
    
    def test_dataloader(self):
        test_subjects, test_codes, test_symptoms, test_labels = [], [], [], []
        test_set = set(zip(self.test_data['SUBJECT_ID'], self.test_data['ICD9_CODE'], self.test_data['SYMPTOMS']))
        subject_code_set = set(zip(self.test_data['SUBJECT_ID'], self.test_data['ICD9_CODE']))
        num_negatives = 4
        for s, c in subject_code_set:
            test_subjects.append(s)
            test_codes.append(c)
            test_symptoms.append(np.random.choice(self.test_data[self.test_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
            test_labels.append(1)

            for _ in range(num_negatives):
                negative_item = np.random.choice(self.all_codes)
                while (s, negative_item) in subject_code_set:
                    negative_item = np.random.choice(self.all_codes)
                test_subjects.append(s)
                test_codes.append(negative_item)
                test_symptoms.append(np.random.choice(self.test_data[self.test_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
                test_labels.append(0)
        
        test_subject_tensor = torch.tensor(train_subjects, dtype=torch.long)
        test_code_tensor = torch.tensor(train_codes, dtype=torch.long)
        test_symptom_tensor = torch.tensor(train_symptoms, dtype=torch.long)
        test_label_tensor = torch.tensor(train_labels, dtype=torch.float)

        test_dataset = TensorDataset(test_subject_tensor, test_code_tensor, test_symptom_tensor, test_label_tensor)
        return DataLoader(test_dataset, batch_size=512, num_workers=0)

and

class NCF(pl.LightningModule):
    def __init__(self, num_subjects, num_codes, num_symptoms, all_codes):
        super().__init__()
        self.all_codes = all_codes


        self.subject_embedding = nn.Embedding(num_subjects, 8)
        self.code_embedding = nn.Embedding(num_codes, 8)
        self.symptom_embedding = nn.Embedding(num_symptoms, 8)

        self.fc1 = nn.Linear(in_features=24, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=32)
        self.output = nn.Linear(in_features=32, out_features=1)

    def forward(self, subject_input, code_input, symptom_input):
        # Pass through embedding layers
        subject_embedded = self.subject_embedding(subject_input)
        code_embedded = self.code_embedding(code_input)
        symptom_embedded = self.symptom_embedding(symptom_input)
        vector = torch.cat([subject_embedded, code_embedded, symptom_embedded], dim=-1)

        # Pass through dense layers
        vector = F.relu(self.fc1(vector))
        vector = F.relu(self.fc2(vector))

        # Output layer
        pred = nn.Sigmoid()(self.output(vector))
        return pred
    
    def training_step(self, batch, batch_idx):
        subject_input, code_input, symptom_input, labels = batch
        predicted_labels = self(subject_input, code_input, symptom_input) 
        loss = nn.BCELoss()
        loss = loss(predicted_labels, labels.view(-1, 1).float())

        self.log('train_loss', loss)
        self.log('train_acc', pl.metrics.Accuracy(predicted_labels, labels.view(-1, 1)))
        self.log('train_macro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='macro'))
        self.log('train_micro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='micro'))
        self.log('train_auc', roc_auc_score(labels.view(-1, 1).float(), predicted_labels.detach().numpy()))
        return loss

    def validation_step(self, batch, batch_idx):
        subject_input, code_input, symptom_input, labels = batch
        predicted_labels = self(subject_input, code_input, symptom_input) 
        loss = nn.BCELoss()
        loss = loss(predicted_labels, labels.view(-1, 1).float())

        self.log('val_loss', loss)
        self.log('val_acc', pl.metrics.Accuracy(predicted_labels, labels.view(-1, 1)))
        self.log('val_macro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='macro'))
        self.log('val_micro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='micro'))
        self.log('val_auc', roc_auc_score(labels.view(-1, 1).float(), predicted_labels.detach().numpy()))
        return loss
    
    def test_step(self, batch, batch_idx):
        subject_input, code_input, symptom_input, labels = batch
        predicted_labels = self(subject_input, code_input, symptom_input) 
        loss = nn.BCELoss()
        loss = loss(predicted_labels, labels.view(-1, 1).float())

        self.log('test_loss', loss)
        self.log('test_acc', pl.metrics.Accuracy(predicted_labels > 0.5, labels.view(-1, 1)))
        self.log('test_macro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='macro'))
        self.log('test_micro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='micro'))
        self.log('test_auc', roc_auc_score(labels.view(-1, 1).float(), predicted_labels.detach().numpy()))
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-5)

When I fit the model, I get this error:

AttributeError                            Traceback (most recent call last)
<ipython-input-44-d47d1b932123> in <module>
----> 1 trainer.fit(model, dm)

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
    508         self.call_hook('on_fit_start')
    509 
--> 510         results = self.accelerator_backend.train()
    511         self.accelerator_backend.teardown()
    512 

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py in train(self)
     55     def train(self):
     56         self.trainer.setup_trainer(self.trainer.model)
---> 57         return self.train_or_test()
     58 
     59     def teardown(self):

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py in train_or_test(self)
     72         else:
     73             self.trainer.train_loop.setup_training()
---> 74             results = self.trainer.train()
     75         return results
     76 

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in train(self)
    559                 with self.profiler.profile("run_training_epoch"):
    560                     # run train epoch
--> 561                     self.train_loop.run_training_epoch()
    562 
    563                 if self.max_steps and self.max_steps <= self.global_step:

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py in run_training_epoch(self)
    566             # SAVE METRICS TO LOGGERS
    567             # -----------------------------------------
--> 568             self.trainer.logger_connector.log_train_step_metrics(batch_output)
    569 
    570             # -----------------------------------------

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py in log_train_step_metrics(self, batch_output)
    594         if self.trainer.train_loop.should_accumulate() and self.trainer.train_loop.automatic_optimization:
    595             return
--> 596         _, batch_log_metrics = self.cached_results.update_logger_connector()
    597         # when metrics should be logged
    598         if self.should_update_logs or self.trainer.fast_dev_run is True:

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in update_logger_connector(self)
    370             batch_pbar_metrics = self.get_latest_batch_pbar_metrics()
    371             logger_connector.add_progress_bar_metrics(batch_pbar_metrics)
--> 372             batch_log_metrics = self.get_latest_batch_log_metrics()
    373 
    374             if is_train:

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in get_latest_batch_log_metrics(self)
    410 
    411     def get_latest_batch_log_metrics(self) -> Dict:
--> 412         batch_log_metrics = self.run_batch_from_func_name("get_batch_log_metrics")
    413         batch_log_metrics.update(self.legacy_batch_log_metrics)
    414         return batch_log_metrics

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in run_batch_from_func_name(self, func_name)
    406     def run_batch_from_func_name(self, func_name) -> Dict:
    407         results = [getattr(hook_result, func_name) for hook_result in self._internals.values()]
--> 408         results = [func(include_forked_originals=False) for func in results]
    409         return {k: v for d in sum(results, []) for k, v in d.items()}  # List[List[dict]] -> dict
    410 

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in <listcomp>(.0)
    406     def run_batch_from_func_name(self, func_name) -> Dict:
    407         results = [getattr(hook_result, func_name) for hook_result in self._internals.values()]
--> 408         results = [func(include_forked_originals=False) for func in results]
    409         return {k: v for d in sum(results, []) for k, v in d.items()}  # List[List[dict]] -> dict
    410 

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in get_batch_log_metrics(self, *args, **kwargs)
    120 
    121     def get_batch_log_metrics(self, *args, **kwargs):
--> 122         return self.run_latest_batch_metrics_with_func_name("get_batch_log_metrics", *args, **kwargs)
    123 
    124     def run_epoch_func(self, results, opt_metric, func_name, *args, **kwargs) -> None:

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in run_latest_batch_metrics_with_func_name(self, func_name, *args, **kwargs)
    111         and cache its pbar and log metrics if already called on,
    112         """
--> 113         return [
    114             self.get_latest_from_func_name(self._latest_ref[dl_idx], func_name, *args, **kwargs)
    115             for dl_idx in range(self.num_dataloaders)

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in <listcomp>(.0)
    112         """
    113         return [
--> 114             self.get_latest_from_func_name(self._latest_ref[dl_idx], func_name, *args, **kwargs)
    115             for dl_idx in range(self.num_dataloaders)
    116         ]

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in get_latest_from_func_name(self, latest_result_opt, func_name, *args, **kwargs)
     98             add_dataloader_idx = self.check_dataloader_idx(latest_result)
     99             func = getattr(latest_result, func_name)
--> 100             results.update(func(*args, add_dataloader_idx=add_dataloader_idx, **kwargs))
    101         return results
    102 

~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/step_result.py in get_batch_log_metrics(self, include_forked_originals, add_dataloader_idx)
    296             if options['logger'] and options['on_step']:
    297                 if isinstance(self[k], Metric):
--> 298                     result[dl_key] = self[k]._forward_cache.detach()
    299                 else:
    300                     result[dl_key] = self[k]

AttributeError: 'NoneType' object has no attribute 'detach'

Any help with resolving this would be appreciated.

This error seems to be raised by Lightning internally.
CC @williamFalcon