ConcatDataset Error: AttributeError: 'numpy.ndarray' object has no attribute 'numel'

Hey guys,

trying that torch.utils.data.ConcatDataset function to concatenate two Datasets together for the training process.

Goal:

  • Keep the original Dataset with no transformations

  • Create a second dataset with transformations

Doing:

  • Create a transformation with transform = A.Compose([...])

  • Create a Dataset class

class YOLODataset:

    def __init__(self, csv_file, img_dir, label_dir, anchors,
                scales=[13, 26, 52],
                n_classes=3,
                transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.transform = transform
        self.scales = scales
        self.anchors = torch.tensor(anchors[0]+anchors[1]+anchors[2]) # (3*3, 2)
        self.n_anchors_per_scale = self.anchors.size(0) // len(scales)
        self.n_classes = n_classes
        self.ignore_iou_thresh = 0.5

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        # Get metadata
        img_path = osp.join(self.img_dir, self.annotations.iloc[idx, 0])
        label_path = osp.join(self.label_dir, self.annotations.iloc[idx, 1])
        # Load img and bboxes
        img = np.array(Image.open(img_path).convert('RGB'))
        #bboxes = np.loadtxt(fname=label_path, delimiter= " " , ndmin=2)
        bboxes = np.loadtxt(fname=label_path, ndmin=2)
        bboxes = np.roll(bboxes, 4, axis=1).tolist() # (x, y, w, h, class)
        # Apply albumentation transformation
        if self.transform is not None:
            augmentations = self.transform(image=img, bboxes=bboxes)
            img = augmentations['image']
            bboxes = augmentations['bboxes']
        # Groundtruth cells placeholder
        targets = [ torch.zeros((self.n_anchors_per_scale, scale, scale, 6))
                    for scale in self.scales ]
        if len(bboxes) == 0:
            return img, targets
        # Create groundtruth across three scales
        # ====================================================================
        # - At least one cell will be assigned to an object
        # - No more than one cell can be assigned to an object in the same scale
        # - Cells in different scale might get assigned to an objects
        # - Cell Format: (x_cell, y_cell, w_cell, h_cell, conf, class)
        iou_anchors = iou_wh( # (N, 9)
                            torch.tensor(bboxes)[..., 2:4], # (N, 2)
                            self.anchors                    # (9, 2)
                            )
        # Assign cells to each object from the most likely one (highest IoU)
        anchor_orders = iou_anchors.argsort(descending=True, dim=-1)
        for idx, anchor_order in enumerate(anchor_orders):
            x, y, w, h, cls = bboxes[idx]
            # Check Flag
            has_obj_in_scale = [False]*len(self.scales)
            for anchor_idx in anchor_order:
                scale_idx = anchor_idx // self.n_anchors_per_scale
                scale_anchor_idx = anchor_idx % self.n_anchors_per_scale
                scale = self.scales[scale_idx]
                i, j = int(scale*y), int(scale*x)
                cell_has_obj = targets[scale_idx][scale_anchor_idx, i, j, 4]
                if (
                    not cell_has_obj
                    and not has_obj_in_scale[scale_idx]
                ):
                    targets[scale_idx][scale_anchor_idx, i, j, 4] = 1
                    x_offset, y_offset = (x*scale-j), (y*scale-i)
                    w_cell, h_cell = (w*scale), (h*scale)
                    coord = torch.tensor([x_offset, y_offset, w_cell, h_cell])
                    targets[scale_idx][scale_anchor_idx, i, j, :4] = coord
                    targets[scale_idx][scale_anchor_idx, i, j, 5] = int(cls)
                    has_obj_in_scale[scale_idx] = True
                elif (
                    not cell_has_obj
                    and has_obj_in_scale[scale_idx]
                    and iou_anchors[idx, anchor_idx] > self.ignore_iou_thresh
                ):
                    # For ignoring prediction (not count for loss)
                    targets[scale_idx][scale_anchor_idx, i, j, 4] = -1
        return img, targets

  • Create a raw_dataset & train_dataset
raw_dataset = YOLODataset(
                            csv_file = config['dataset']['train']['csv'],
                            img_dir = config['dataset']['train']['img_root'],
                            label_dir = config['dataset']['train']['label_root'],
                            anchors = self.anchors,
                            #anchors = config['dataset']['anchors'],
                            scales = wandb.config['scales'],
                            #scales = self.log_config.scales,
                            n_classes = config['dataset']['n_classes'],
                            transform=None)
        
train_dataset = YOLODataset(
                            csv_file = config['dataset']['train']['csv'],
                            img_dir = config['dataset']['train']['img_root'],
                            label_dir = config['dataset']['train']['label_root'],
                            anchors = self.anchors,
                            #anchors = config['dataset']['anchors'],
                            scales = wandb.config['scales'],
                            #scales = self.log_config.scales,
                            n_classes = config['dataset']['n_classes'],
                            transform=train_transform)
  • ConcatDataset
dataset = torch.utils.data.ConcatDataset([train_dataset,raw_dataset])
self.train_loader = DataLoader(
                                dataset = dataset,
                                #dataset = train_dataset,
                                batch_size = wandb.config['batch_size'], #self.log_config.batch_size,
                                num_workers = wandb.config['num_workers'], #self.log_config.num_workers,
                                pin_memory=True, shuffle=True, drop_last=False)

Result:

  • leads to the following Error
Traceback (most recent call last):
  File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\main.py", line 73, in <module>
    main(args['config'])
  File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\main.py", line 64, in main
    agent.train()
  File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\yolov3.py", line 330, in train
    self._train_one_epoch()
  File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\yolov3.py", line 387, in _train_one_epoch
    for batch_idx, (imgs, targets) in enumerate(loop):
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\tqdm\std.py", line 1185, in __iter__
    for obj in iterable:
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
    data = self._next_data()
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\dataloader.py", line 1203, in _next_data
    return self._process_data(data)
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\dataloader.py", line 1229, in _process_data
    data.reraise()
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\_utils.py", line 425, in reraise
    raise self.exc_type(msg)
AttributeError: Caught AttributeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\fetch.py", line 47, in fetch
    return self.collate_fn(data)
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 84, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 84, in <listcomp>
    return [default_collate(samples) for samples in transposed]
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 53, in default_collate
    numel = sum([x.numel() for x in batch])
  File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 53, in <listcomp>
    numel = sum([x.numel() for x in batch])
AttributeError: 'numpy.ndarray' object has no attribute 'numel'

Could someone please help me with that?

Thanks in advance!

1 Like

.numel() is defined for PyTorch tensors, not numpy arrays. Based on your error message it seems that you are using numpy arrays inside your Dataset, which then fails in:

numel = sum([x.numel() for x in batch])

Assuming the __getitem__ returns these numpy arrays (I don’t have a clue which types these objects have), conert them to tensors via tensor = torch.from_numpy(array).

1 Like

Thanks @ptrblck, I realized the question was quite basic and i solved it!

You are awesome!

I wish u a fantastic day!

1 Like