Not able to run MONAI Auto3DSeg on TCIA dataset

LucianoDeben · March 11, 2024, 8:18am

Hi,

I am trying to implement the Auto3DSeg Autorunner pipeline on a TCIA liver cancer dataset here. I succesfully loaded the dataset using the MONAI TciaDataset API call and made a datalist according to the MCD tutorial here. Problems arise when trying to run the AutoRunner locally on a few test images:

# Import libraries
from monai.data import DataLoader
from monai.transforms import (EnsureChannelFirstd,
Compose, LoadImaged, ResampleToMatchd)

from monai.apps import TciaDataset
from monai.apps.auto3dseg import AutoRunner
from monai.bundle import ConfigParser

from monai.config import print_config
import json

print_config()

# Specify the collection and segmentation type
collection, seg_type = "HCC-TACE-Seg", "SEG"

# Create a dictionary to map the labels in the segmentation to the labels in the image
label_dict = {'Liver': 0,
  'Tumor': 1,
  'vessels': 2,
  'aorta': 3}

# Create a composed transform that loads the image and segmentation, then resamples the image to match the segmentation
transform = Compose(
    [
        LoadImaged(reader="PydicomReader", keys=["image", "label"], label_dict=label_dict),
        EnsureChannelFirstd(keys=["image", "label"]),
        ResampleToMatchd(keys="image", key_dst="label"),
    ]
)

# Create a dataset for the training and test data
train_dataset = TciaDataset(
    root_dir="../data/train",
    collection=collection,
    section="training",
    transform=transform,
    download=True,
    download_len=5,
    seg_type=seg_type,
    progress=True,
    cache_rate=0.0,
    val_frac=0.2,
)

test_dataset = TciaDataset(
    root_dir="../data/test",
    collection=collection,
    section="test",
    transform=transform,
    download=True,
    download_len=2,
    seg_type=seg_type,
    progress=True,
    cache_rate=0.0,
    val_frac=0.0,
)

data_list = {"training": train_dataset.datalist, "testing": test_dataset.datalist}

datalist_file = "../auto3dseg_datalist.json"
with open(datalist_file, "w") as f:
    json.dump(data_list, f)

# Set the working directory
work_dir = "../data/auto3dseg"

# Create input configuration .yaml file
input_config = {
    "name": "HCC-TACE-Seg",
    "task": "segmentation",  
    "modality": "CT", 
    "datalist": "../auto3dseg_datalist.json", 
    "dataroot": work_dir, 
}

config_yaml = "../auto3dseg_config.yaml"
ConfigParser.export_config_file(input_config, config_yaml)

train_param = {
    "num_epochs_per_validation": 1,
    "num_images_per_batch": 1,
    "num_epochs": 1,
    "num_warmup_epochs": 1,
    "num_images_per_batch": 2,
}

# Create an AutoRunner
runner = AutoRunner(work_dir= work_dir,input=input_config, train=True, analyze=True, algos="swinunetr")
runner.set_training_params(train_param)
runner.set_num_fold(num_fold = 1)
runner.run()

The script fails to run due to a RunTimeError with the following stack trace:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[36], line 13
     11 runner.set_training_params(train_param)
     12 runner.set_num_fold(num_fold = 1)
---> 13 runner.run()

File c:\Users\20191678\AppData\Local\anaconda3\envs\ITP\Lib\site-packages\monai\apps\auto3dseg\auto_runner.py:743, in AutoRunner.run(self)
    739 logger.info("Running data analysis...")
    740 da = DataAnalyzer(
    741     self.datalist_filename, self.dataroot, output_path=self.datastats_filename, **self.analyze_params
    742 )
--> 743 da.get_all_case_stats()
    745 da = None  # type: ignore
    746 torch.cuda.empty_cache()

File c:\Users\20191678\AppData\Local\anaconda3\envs\ITP\Lib\site-packages\monai\apps\auto3dseg\data_analyzer.py:230, in DataAnalyzer.get_all_case_stats(self, key, transform_list)
    228             result_bycase[DataStatsKeys.BY_CASE].extend(_[DataStatsKeys.BY_CASE])
    229 else:
--> 230     result_bycase = self._get_all_case_stats(0, 1, None, key, transform_list)
    232 summarizer = SegSummarizer(
    233     self.image_key,
    234     self.label_key,
   (...)
    239     histogram_only=self.histogram_only,
...
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\20191678\AppData\Local\anaconda3\envs\ITP\Lib\site-packages\monai\transforms\transform.py", line 171, in apply_transform
    raise RuntimeError(f"applying transform {transform}") from e
RuntimeError: applying transform <monai.transforms.compose.Compose object at 0x000002688A1774D0>

It seems that the internal DataAnalyzer module is trying to apply a transform that is None by default. What is expected here?

NOTE: I am running Python 3.11.8 with the conda env dependencies specified in the MONAI repo.