Hi,
I am running into the following error when running:
> import os
> os.chdir("/Users/Wu/Desktop/Research/DL_train/GradCam_classific/DL_train")
>
>
> import argparse
> from pathlib import Path
>
> import torch
> import numpy as np
> from pytorch_lightning.callbacks import ModelCheckpoint
> from pytorch_lightning.loggers import TensorBoardLogger
> from sklearn.model_selection import train_test_split
> from torch.utils.data import DataLoader
>
> from dataloader import MRTDataset, get_patients
> from networks.attention_unet import AttentionUNet
> import pytorch_lightning as pl
> import matplotlib.pyplot as plt
>
> from polyaxon_client.tracking import Experiment
>
> from densenet import DenseNet
> from test_densenet import test_densenet
> parser = argparse.ArgumentParser()
>
> parser.add_argument('--in_channels', type=int, default=1)
> parser.add_argument('--out_channels', type=int, default=1)
> parser.add_argument('--out_classes', type=int, default=3)
>
> parser.add_argument('--learning_rate', type=float, default=1e-3)
> parser.add_argument('--weight_decay', type=str, default=1e-3)
>
> parser.add_argument('--train_batch_size', type=int, default=8)
> parser.add_argument('--test_batch_size', type=int, default=8)
>
> parser.add_argument('--num_epochs', type=int, default=32)
> parser.add_argument('--num_workers', type=int, default=4)
>
> parser.add_argument('--validation_size', type=float, default=0.2)
> parser.add_argument('--test_size', type=float, default=0.2)
>
> parser.add_argument('--split_seed', type=int, default=42)
>
>
> def compute_mean_and_std(train_loader, key): # mean and std for mri intensity
> mean = 0.
> std = 0.
> min = 1000
> max = 0
> for data in train_loader:
> images = data[key]
> mean += images.mean(dim=[2, 3, 4]).sum(0)
> std += images.std(dim=[2, 3, 4]).sum(0)
> if data['supervised'].sum()==0:
> continue
> if min > images.min():
> min = images.min()
> if max < images.max():
> max = images.max()
>
> mean /= len(train_loader.dataset)
> std /= len(train_loader.dataset)
> print(min)
> print(max)
> return mean, std
>
> if __name__ == '__main__':
> hparams = parser.parse_args()
> #experiment = Experiment()
> data_path = '//Volumes/Extreme Pro/DL_train/'
> csv_path = '/pain.csv'
> hparams.out_classes = 2
> hparams.learning_rate = 1e-3
> hparams.weight_decay = 1e-3
> hparams.train_batch_size = 4
> hparams.test_batch_size = 4
> hparams.num_epochs = 32
> hparams.split_seed = 42
> output_path = '/Volumes/Extreme Pro/DL_train/'
>
>
> sizes, spacings = [112, 128, 112], [1.5, 1.5, 1.5]
> strides, kernels = [], []
>
> csv_path = '/Volumes/Extreme Pro/DL_train/pain.csv'
>
> patients, patient_idx, labels = get_patients(data_path, csv_path)
> patient_idx, labels = torch.tensor(patient_idx), torch.tensor(labels)
> print("Splitting the data...")
> train_idx, test_idx, train_patients, test_patients, train_labels, test_labels = train_test_split(
> patient_idx,
> patients,
> labels,
> test_size=hparams.test_size,
> random_state=hparams.split_seed,
> stratify=labels,
> shuffle=True)
>
> train_idx, valid_idx, train_patients, valid_patients, train_labels, valid_labels = train_test_split(
> train_idx,
> train_patients,
> train_labels,
> test_size=hparams.validation_size,
> random_state=hparams.split_seed,
> stratify=train_labels,
> shuffle=True)
> print("Train: ")
> print("Number of patients: ", len(train_patients))
> train_set = MRTDataset(data_path, csv_path, train_patients)#, transforms=train_transform)
> print("Validation: ")
> print("Number of patients: ", len(valid_patients))
> valid_set = MRTDataset(data_path, csv_path, valid_patients)
> print("Test: ")
> print("Number of patients: ", len(test_patients))
> test_set = MRTDataset(data_path, csv_path, test_patients)
> train_loader = DataLoader(train_set, batch_size=4, num_workers=8, shuffle=True)
> valid_loader = DataLoader(valid_set, batch_size=1, num_workers=8)
> test_loader = DataLoader(test_set, batch_size=1, num_workers=0)
> print("Out Channels: ", hparams.out_channels)
>
> densenet = DenseNet(learning_rate=hparams.learning_rate)
>
> trainer = pl.Trainer(max_epochs=hparams.num_epochs, progress_bar_refresh_rate=200)
>
>
> trainer.fit(densenet, train_dataloader=train_loader, val_dataloaders=[valid_loader])
when executing the trainer.fit the following error is thrown:
11.2 M Trainable params
0 Non-trainable params
11.2 M Total params
Validation sanity check: 0it [00:00, ?it/s]
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
ModuleNotFoundError: No module named 'dataloader'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
ModuleNotFoundError: No module named 'dataloader'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
ModuleNotFoundError: No module named 'dataloader'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
ModuleNotFoundError: No module named 'dataloader'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
ModuleNotFoundError: No module named 'dataloader'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
ModuleNotFoundError: No module named 'dataloader'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
ModuleNotFoundError: No module named 'dataloader'
Traceback (most recent call last):
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 990, in _try_get_data
data = self._data_queue.get(timeout=timeout)
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/queues.py", line 113, in get
if not self._poll(timeout):
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/connection.py", line 262, in poll
return self._poll(timeout)
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/connection.py", line 429, in _poll
r = wait([self], timeout)
File "/Users/Wu/opt/anaconda3/lib/python3.9/multiprocessing/connection.py", line 936, in wait
ready = selector.select(timeout)
File "/Users/Wu/opt/anaconda3/lib/python3.9/selectors.py", line 416, in select
fd_event_list = self._selector.poll(timeout)
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/signal_handling.py", line 66, in handler
_error_if_any_worker_fails()
RuntimeError: DataLoader worker (pid 66043) exited unexpectedly with exit code 1. Details are lost due to multiprocessing. Rerunning with num_workers=0 may give better error trace.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/var/folders/79/z7g43_0x08g2yj7w6lb_j5280000gn/T/ipykernel_62506/90799514.py", line 1, in <module>
trainer.fit(densenet, train_dataloader=train_loader, val_dataloaders=[valid_loader])
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 470, in fit
results = self.accelerator_backend.train()
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/pytorch_lightning/accelerators/cpu_accelerator.py", line 62, in train
results = self.train_or_test()
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/pytorch_lightning/accelerators/accelerator.py", line 69, in train_or_test
results = self.trainer.train()
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 492, in train
self.run_sanity_check(self.get_model())
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 690, in run_sanity_check
_, eval_results = self.run_evaluation(test_mode=False, max_batches=self.num_sanity_val_batches)
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 593, in run_evaluation
for batch_idx, batch in enumerate(dataloader):
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 521, in __next__
data = self._next_data()
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1186, in _next_data
idx, data = self._get_data()
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1152, in _get_data
success, data = self._try_get_data()
File "/Users/Wu/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1003, in _try_get_data
raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
RuntimeError: DataLoader worker (pid(s) 66043) exited unexpectedly