I am trying to create a hybrid recommender system using pytorch lightning. Here are my dataset and model classes:
import pytorch_lightning as pl
class MIMICDataset(pl.LightningDataModule):
def __init__(self, train_data, valid_data, test_data, all_codes):
super().__init__()
self.train_data = train_data
self.val_data = valid_data
self.test_data = test_data
self.all_codes = all_codes
def train_dataloader(self):
train_subjects, train_codes, train_symptoms, train_labels = [], [], [], []
train_set = set(zip(self.train_data['SUBJECT_ID'], self.train_data['ICD9_CODE'], self.train_data['SYMPTOMS']))
subject_code_set = set(zip(self.train_data['SUBJECT_ID'], self.train_data['ICD9_CODE']))
num_negatives = 4
for s, c in subject_code_set:
train_subjects.append(s)
train_codes.append(c)
train_symptoms.append(np.random.choice(self.train_data[self.train_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
train_labels.append(1)
for _ in range(num_negatives):
negative_item = np.random.choice(self.all_codes)
while (s, negative_item) in subject_code_set:
negative_item = np.random.choice(self.all_codes)
train_subjects.append(s)
train_codes.append(negative_item)
train_symptoms.append(np.random.choice(self.train_data[self.train_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
train_labels.append(0)
train_subject_tensor = torch.tensor(train_subjects, dtype=torch.long)
train_code_tensor = torch.tensor(train_codes, dtype=torch.long)
train_symptom_tensor = torch.tensor(train_symptoms, dtype=torch.long)
train_label_tensor = torch.tensor(train_labels, dtype=torch.float)
train_dataset = TensorDataset(train_subject_tensor, train_code_tensor, train_symptom_tensor, train_label_tensor)
return DataLoader(train_dataset, batch_size=512, num_workers=0)
def val_dataloader(self):
val_subjects, val_codes, val_symptoms, val_labels = [], [], [], []
val_set = set(zip(self.val_data['SUBJECT_ID'], self.val_data['ICD9_CODE'], self.val_data['SYMPTOMS']))
subject_code_set = set(zip(self.val_data['SUBJECT_ID'], self.val_data['ICD9_CODE']))
num_negatives = 4
for s, c in subject_code_set:
val_subjects.append(s)
val_codes.append(c)
val_symptoms.append(np.random.choice(self.val_data[self.val_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
val_labels.append(1)
for _ in range(num_negatives):
negative_item = np.random.choice(self.all_codes)
while (s, negative_item) in subject_code_set:
negative_item = np.random.choice(self.all_codes)
val_subjects.append(s)
val_codes.append(negative_item)
val_symptoms.append(np.random.choice(self.val_data[self.val_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
val_labels.append(0)
val_subject_tensor = torch.tensor(train_subjects, dtype=torch.long)
val_code_tensor = torch.tensor(train_codes, dtype=torch.long)
val_symptom_tensor = torch.tensor(train_symptoms, dtype=torch.long)
val_label_tensor = torch.tensor(train_labels, dtype=torch.float)
val_dataset = TensorDataset(val_subject_tensor, val_code_tensor, val_symptom_tensor, val_label_tensor)
return DataLoader(val_dataset, batch_size=512, num_workers=0)
def test_dataloader(self):
test_subjects, test_codes, test_symptoms, test_labels = [], [], [], []
test_set = set(zip(self.test_data['SUBJECT_ID'], self.test_data['ICD9_CODE'], self.test_data['SYMPTOMS']))
subject_code_set = set(zip(self.test_data['SUBJECT_ID'], self.test_data['ICD9_CODE']))
num_negatives = 4
for s, c in subject_code_set:
test_subjects.append(s)
test_codes.append(c)
test_symptoms.append(np.random.choice(self.test_data[self.test_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
test_labels.append(1)
for _ in range(num_negatives):
negative_item = np.random.choice(self.all_codes)
while (s, negative_item) in subject_code_set:
negative_item = np.random.choice(self.all_codes)
test_subjects.append(s)
test_codes.append(negative_item)
test_symptoms.append(np.random.choice(self.test_data[self.test_data['SUBJECT_ID'] == s]['SYMPTOMS'].to_numpy()))
test_labels.append(0)
test_subject_tensor = torch.tensor(train_subjects, dtype=torch.long)
test_code_tensor = torch.tensor(train_codes, dtype=torch.long)
test_symptom_tensor = torch.tensor(train_symptoms, dtype=torch.long)
test_label_tensor = torch.tensor(train_labels, dtype=torch.float)
test_dataset = TensorDataset(test_subject_tensor, test_code_tensor, test_symptom_tensor, test_label_tensor)
return DataLoader(test_dataset, batch_size=512, num_workers=0)
and
class NCF(pl.LightningModule):
def __init__(self, num_subjects, num_codes, num_symptoms, all_codes):
super().__init__()
self.all_codes = all_codes
self.subject_embedding = nn.Embedding(num_subjects, 8)
self.code_embedding = nn.Embedding(num_codes, 8)
self.symptom_embedding = nn.Embedding(num_symptoms, 8)
self.fc1 = nn.Linear(in_features=24, out_features=64)
self.fc2 = nn.Linear(in_features=64, out_features=32)
self.output = nn.Linear(in_features=32, out_features=1)
def forward(self, subject_input, code_input, symptom_input):
# Pass through embedding layers
subject_embedded = self.subject_embedding(subject_input)
code_embedded = self.code_embedding(code_input)
symptom_embedded = self.symptom_embedding(symptom_input)
vector = torch.cat([subject_embedded, code_embedded, symptom_embedded], dim=-1)
# Pass through dense layers
vector = F.relu(self.fc1(vector))
vector = F.relu(self.fc2(vector))
# Output layer
pred = nn.Sigmoid()(self.output(vector))
return pred
def training_step(self, batch, batch_idx):
subject_input, code_input, symptom_input, labels = batch
predicted_labels = self(subject_input, code_input, symptom_input)
loss = nn.BCELoss()
loss = loss(predicted_labels, labels.view(-1, 1).float())
self.log('train_loss', loss)
self.log('train_acc', pl.metrics.Accuracy(predicted_labels, labels.view(-1, 1)))
self.log('train_macro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='macro'))
self.log('train_micro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='micro'))
self.log('train_auc', roc_auc_score(labels.view(-1, 1).float(), predicted_labels.detach().numpy()))
return loss
def validation_step(self, batch, batch_idx):
subject_input, code_input, symptom_input, labels = batch
predicted_labels = self(subject_input, code_input, symptom_input)
loss = nn.BCELoss()
loss = loss(predicted_labels, labels.view(-1, 1).float())
self.log('val_loss', loss)
self.log('val_acc', pl.metrics.Accuracy(predicted_labels, labels.view(-1, 1)))
self.log('val_macro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='macro'))
self.log('val_micro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='micro'))
self.log('val_auc', roc_auc_score(labels.view(-1, 1).float(), predicted_labels.detach().numpy()))
return loss
def test_step(self, batch, batch_idx):
subject_input, code_input, symptom_input, labels = batch
predicted_labels = self(subject_input, code_input, symptom_input)
loss = nn.BCELoss()
loss = loss(predicted_labels, labels.view(-1, 1).float())
self.log('test_loss', loss)
self.log('test_acc', pl.metrics.Accuracy(predicted_labels > 0.5, labels.view(-1, 1)))
self.log('test_macro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='macro'))
self.log('test_micro_f1', f1_score(labels.view(-1, 1).float(), predicted_labels > 0.5, average='micro'))
self.log('test_auc', roc_auc_score(labels.view(-1, 1).float(), predicted_labels.detach().numpy()))
return loss
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=1e-5)
When I fit the model, I get this error:
AttributeError Traceback (most recent call last)
<ipython-input-44-d47d1b932123> in <module>
----> 1 trainer.fit(model, dm)
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
508 self.call_hook('on_fit_start')
509
--> 510 results = self.accelerator_backend.train()
511 self.accelerator_backend.teardown()
512
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py in train(self)
55 def train(self):
56 self.trainer.setup_trainer(self.trainer.model)
---> 57 return self.train_or_test()
58
59 def teardown(self):
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py in train_or_test(self)
72 else:
73 self.trainer.train_loop.setup_training()
---> 74 results = self.trainer.train()
75 return results
76
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in train(self)
559 with self.profiler.profile("run_training_epoch"):
560 # run train epoch
--> 561 self.train_loop.run_training_epoch()
562
563 if self.max_steps and self.max_steps <= self.global_step:
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py in run_training_epoch(self)
566 # SAVE METRICS TO LOGGERS
567 # -----------------------------------------
--> 568 self.trainer.logger_connector.log_train_step_metrics(batch_output)
569
570 # -----------------------------------------
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py in log_train_step_metrics(self, batch_output)
594 if self.trainer.train_loop.should_accumulate() and self.trainer.train_loop.automatic_optimization:
595 return
--> 596 _, batch_log_metrics = self.cached_results.update_logger_connector()
597 # when metrics should be logged
598 if self.should_update_logs or self.trainer.fast_dev_run is True:
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in update_logger_connector(self)
370 batch_pbar_metrics = self.get_latest_batch_pbar_metrics()
371 logger_connector.add_progress_bar_metrics(batch_pbar_metrics)
--> 372 batch_log_metrics = self.get_latest_batch_log_metrics()
373
374 if is_train:
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in get_latest_batch_log_metrics(self)
410
411 def get_latest_batch_log_metrics(self) -> Dict:
--> 412 batch_log_metrics = self.run_batch_from_func_name("get_batch_log_metrics")
413 batch_log_metrics.update(self.legacy_batch_log_metrics)
414 return batch_log_metrics
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in run_batch_from_func_name(self, func_name)
406 def run_batch_from_func_name(self, func_name) -> Dict:
407 results = [getattr(hook_result, func_name) for hook_result in self._internals.values()]
--> 408 results = [func(include_forked_originals=False) for func in results]
409 return {k: v for d in sum(results, []) for k, v in d.items()} # List[List[dict]] -> dict
410
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in <listcomp>(.0)
406 def run_batch_from_func_name(self, func_name) -> Dict:
407 results = [getattr(hook_result, func_name) for hook_result in self._internals.values()]
--> 408 results = [func(include_forked_originals=False) for func in results]
409 return {k: v for d in sum(results, []) for k, v in d.items()} # List[List[dict]] -> dict
410
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in get_batch_log_metrics(self, *args, **kwargs)
120
121 def get_batch_log_metrics(self, *args, **kwargs):
--> 122 return self.run_latest_batch_metrics_with_func_name("get_batch_log_metrics", *args, **kwargs)
123
124 def run_epoch_func(self, results, opt_metric, func_name, *args, **kwargs) -> None:
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in run_latest_batch_metrics_with_func_name(self, func_name, *args, **kwargs)
111 and cache its pbar and log metrics if already called on,
112 """
--> 113 return [
114 self.get_latest_from_func_name(self._latest_ref[dl_idx], func_name, *args, **kwargs)
115 for dl_idx in range(self.num_dataloaders)
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in <listcomp>(.0)
112 """
113 return [
--> 114 self.get_latest_from_func_name(self._latest_ref[dl_idx], func_name, *args, **kwargs)
115 for dl_idx in range(self.num_dataloaders)
116 ]
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py in get_latest_from_func_name(self, latest_result_opt, func_name, *args, **kwargs)
98 add_dataloader_idx = self.check_dataloader_idx(latest_result)
99 func = getattr(latest_result, func_name)
--> 100 results.update(func(*args, add_dataloader_idx=add_dataloader_idx, **kwargs))
101 return results
102
~/anaconda3/lib/python3.8/site-packages/pytorch_lightning/core/step_result.py in get_batch_log_metrics(self, include_forked_originals, add_dataloader_idx)
296 if options['logger'] and options['on_step']:
297 if isinstance(self[k], Metric):
--> 298 result[dl_key] = self[k]._forward_cache.detach()
299 else:
300 result[dl_key] = self[k]
AttributeError: 'NoneType' object has no attribute 'detach'
Any help with resolving this would be appreciated.