ConcatDataset Error: AttributeError: 'numpy.ndarray' object has no attribute 'numel'

Hey guys,

trying that torch.utils.data.ConcatDataset function to concatenate two Datasets together for the training process.

Goal:

  • Keep the original Dataset with no transformations

  • Create a second dataset with transformations

Doing:

  • Create a transformation with transform = A.Compose([...])

  • Create a Dataset class

class YOLODataset:

    def __init__(self, csv_file, img_dir, label_dir, anchors,
                scales=[13, 26, 52],
                n_classes=3,
                transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.transform = transform
        self.scales = scales
        self.anchors = torch.tensor(anchors[0]+anchors[1]+anchors[2]) # (3*3, 2)
        self.n_anchors_per_scale = self.anchors.size(0) // len(scales)
        self.n_classes = n_classes
        self.ignore_iou_thresh = 0.5

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        # Get metadata
        img_path = osp.join(self.img_dir, self.annotations.iloc[idx, 0])
        label_path = osp.join(self.label_dir, self.annotations.iloc[idx, 1])
        # Load img and bboxes
        img = np.array(Image.open(img_path).convert('RGB'))
        #bboxes = np.loadtxt(fname=label_path, delimiter= " " , ndmin=2)
        bboxes = np.loadtxt(fname=label_path, ndmin=2)
        bboxes = np.roll(bboxes, 4, axis=1).tolist() # (x, y, w, h, class)
        # Apply albumentation transformation
        if self.transform is not None:
            augmentations = self.transform(image=img, bboxes=bboxes)
            img = augmentations['image']
            bboxes = augmentations['bboxes']
        # Groundtruth cells placeholder
        targets = [ torch.zeros((self.n_anchors_per_scale, scale, scale, 6))
                    for scale in self.scales ]
        if len(bboxes) == 0:
            return img, targets
        # Create groundtruth across three scales
        # ====================================================================
        # - At least one cell will be assigned to an object
        # - No more than one cell can be assigned to an object in the same scale
        # - Cells in different scale might get assigned to an objects
        # - Cell Format: (x_cell, y_cell, w_cell, h_cell, conf, class)
        iou_anchors = iou_wh( # (N, 9)
                            torch.tensor(bboxes)[..., 2:4], # (N, 2)
                            self.anchors                    # (9, 2)
                            )
        # Assign cells to each object from the most likely one (highest IoU)
        anchor_orders = iou_anchors.argsort(descending=True, dim=-1)
        for idx, anchor_order in enumerate(anchor_orders):
            x, y, w, h, cls = bboxes[idx]
            # Check Flag
            has_obj_in_scale = [False]*len(self.scales)
            for anchor_idx in anchor_order:
                scale_idx = anchor_idx // self.n_anchors_per_scale
                scale_anchor_idx = anchor_idx % self.n_anchors_per_scale
                scale = self.scales[scale_idx]
                i, j = int(scale*y), int(scale*x)
                cell_has_obj = targets[scale_idx][scale_anchor_idx, i, j, 4]
                if (
                    not cell_has_obj
                    and not has_obj_in_scale[scale_idx]
                ):
                    targets[scale_idx][scale_anchor_idx, i, j, 4] = 1
                    x_offset, y_offset = (x*scale-j), (y*scale-i)
                    w_cell, h_cell = (w*scale), (h*scale)
                    coord = torch.tensor([x_offset, y_offset, w_cell, h_cell])
                    targets[scale_idx][scale_anchor_idx, i, j, :4] = coord
                    targets[scale_idx][scale_anchor_idx, i, j, 5] = int(cls)
                    has_obj_in_scale[scale_idx] = True
                elif (
                    not cell_has_obj
                    and has_obj_in_scale[scale_idx]
                    and iou_anchors[idx, anchor_idx] > self.ignore_iou_thresh
                ):
                    # For ignoring prediction (not count for loss)
                    targets[scale_idx][scale_anchor_idx, i, j, 4] = -1
        return img, targets

  • Create a raw_dataset & train_dataset
raw_dataset = YOLODataset(
                            csv_file = config['dataset']['train']['csv'],
                            img_dir = config['dataset']['train']['img_root'],
                            label_dir = config['dataset']['train']['label_root'],
                            anchors = self.anchors,
                            #anchors = config['dataset']['anchors'],
                            scales = wandb.config['scales'],
                            #scales = self.log_config.scales,
                            n_classes = config['dataset']['n_classes'],
                            transform=None)
        
train_dataset = YOLODataset(
                            csv_file = config['dataset']['train']['csv'],
                            img_dir = config['dataset']['train']['img_root'],
                            label_dir = config['dataset']['train']['label_root'],
                            anchors = self.anchors,
                            #anchors = config['dataset']['anchors'],
                            scales = wandb.config['scales'],
                            #scales = self.log_config.scales,
                            n_classes = config['dataset']['n_classes'],
                            transform=train_transform)
  • ConcatDataset
dataset = torch.utils.data.ConcatDataset([train_dataset,raw_dataset])
self.train_loader = DataLoader(
                                dataset = dataset,
                                #dataset = train_dataset,
                                batch_size = wandb.config['batch_size'], #self.log_config.batch_size,
                                num_workers = wandb.config['num_workers'], #self.log_config.num_workers,
                                pin_memory=True, shuffle=True, drop_last=False)

Result:

  • leads to the following Error
Traceback (most recent call last):
  File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\main.py", line 73, in <module>
    main(args['config'])
  File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\main.py", line 64, in main
    agent.train()
  File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\yolov3.py", line 330, in train
    self._train_one_epoch()
  File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\yolov3.py", line 387, in _train_one_epoch
    for batch_idx, (imgs, targets) in enumerate(loop):
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\tqdm\std.py", line 1185, in __iter__
    for obj in iterable:
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
    data = self._next_data()
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\dataloader.py", line 1203, in _next_data
    return self._process_data(data)
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\dataloader.py", line 1229, in _process_data
    data.reraise()
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\_utils.py", line 425, in reraise
    raise self.exc_type(msg)
AttributeError: Caught AttributeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\fetch.py", line 47, in fetch
    return self.collate_fn(data)
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 84, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 84, in <listcomp>
    return [default_collate(samples) for samples in transposed]
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 53, in default_collate
    numel = sum([x.numel() for x in batch])
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 53, in <listcomp>
    numel = sum([x.numel() for x in batch])
AttributeError: 'numpy.ndarray' object has no attribute 'numel'

Could someone please help me with that?

Thanks in advance!

.numel() is defined for PyTorch tensors, not numpy arrays. Based on your error message it seems that you are using numpy arrays inside your Dataset, which then fails in:

numel = sum([x.numel() for x in batch])

Assuming the __getitem__ returns these numpy arrays (I don’t have a clue which types these objects have), conert them to tensors via tensor = torch.from_numpy(array).

Thanks @ptrblck, I realized the question was quite basic and i solved it!

You are awesome!

I wish u a fantastic day!

1 Like