Hey guys,
trying that torch.utils.data.ConcatDataset
function to concatenate two Datasets together for the training process.
Goal:
-
Keep the original Dataset with no transformations
-
Create a second dataset with transformations
Doing:
-
Create a transformation with
transform = A.Compose([...])
-
Create a Dataset class
class YOLODataset:
def __init__(self, csv_file, img_dir, label_dir, anchors,
scales=[13, 26, 52],
n_classes=3,
transform=None):
self.annotations = pd.read_csv(csv_file)
self.img_dir = img_dir
self.label_dir = label_dir
self.transform = transform
self.scales = scales
self.anchors = torch.tensor(anchors[0]+anchors[1]+anchors[2]) # (3*3, 2)
self.n_anchors_per_scale = self.anchors.size(0) // len(scales)
self.n_classes = n_classes
self.ignore_iou_thresh = 0.5
def __len__(self):
return len(self.annotations)
def __getitem__(self, idx):
# Get metadata
img_path = osp.join(self.img_dir, self.annotations.iloc[idx, 0])
label_path = osp.join(self.label_dir, self.annotations.iloc[idx, 1])
# Load img and bboxes
img = np.array(Image.open(img_path).convert('RGB'))
#bboxes = np.loadtxt(fname=label_path, delimiter= " " , ndmin=2)
bboxes = np.loadtxt(fname=label_path, ndmin=2)
bboxes = np.roll(bboxes, 4, axis=1).tolist() # (x, y, w, h, class)
# Apply albumentation transformation
if self.transform is not None:
augmentations = self.transform(image=img, bboxes=bboxes)
img = augmentations['image']
bboxes = augmentations['bboxes']
# Groundtruth cells placeholder
targets = [ torch.zeros((self.n_anchors_per_scale, scale, scale, 6))
for scale in self.scales ]
if len(bboxes) == 0:
return img, targets
# Create groundtruth across three scales
# ====================================================================
# - At least one cell will be assigned to an object
# - No more than one cell can be assigned to an object in the same scale
# - Cells in different scale might get assigned to an objects
# - Cell Format: (x_cell, y_cell, w_cell, h_cell, conf, class)
iou_anchors = iou_wh( # (N, 9)
torch.tensor(bboxes)[..., 2:4], # (N, 2)
self.anchors # (9, 2)
)
# Assign cells to each object from the most likely one (highest IoU)
anchor_orders = iou_anchors.argsort(descending=True, dim=-1)
for idx, anchor_order in enumerate(anchor_orders):
x, y, w, h, cls = bboxes[idx]
# Check Flag
has_obj_in_scale = [False]*len(self.scales)
for anchor_idx in anchor_order:
scale_idx = anchor_idx // self.n_anchors_per_scale
scale_anchor_idx = anchor_idx % self.n_anchors_per_scale
scale = self.scales[scale_idx]
i, j = int(scale*y), int(scale*x)
cell_has_obj = targets[scale_idx][scale_anchor_idx, i, j, 4]
if (
not cell_has_obj
and not has_obj_in_scale[scale_idx]
):
targets[scale_idx][scale_anchor_idx, i, j, 4] = 1
x_offset, y_offset = (x*scale-j), (y*scale-i)
w_cell, h_cell = (w*scale), (h*scale)
coord = torch.tensor([x_offset, y_offset, w_cell, h_cell])
targets[scale_idx][scale_anchor_idx, i, j, :4] = coord
targets[scale_idx][scale_anchor_idx, i, j, 5] = int(cls)
has_obj_in_scale[scale_idx] = True
elif (
not cell_has_obj
and has_obj_in_scale[scale_idx]
and iou_anchors[idx, anchor_idx] > self.ignore_iou_thresh
):
# For ignoring prediction (not count for loss)
targets[scale_idx][scale_anchor_idx, i, j, 4] = -1
return img, targets
- Create a raw_dataset & train_dataset
raw_dataset = YOLODataset(
csv_file = config['dataset']['train']['csv'],
img_dir = config['dataset']['train']['img_root'],
label_dir = config['dataset']['train']['label_root'],
anchors = self.anchors,
#anchors = config['dataset']['anchors'],
scales = wandb.config['scales'],
#scales = self.log_config.scales,
n_classes = config['dataset']['n_classes'],
transform=None)
train_dataset = YOLODataset(
csv_file = config['dataset']['train']['csv'],
img_dir = config['dataset']['train']['img_root'],
label_dir = config['dataset']['train']['label_root'],
anchors = self.anchors,
#anchors = config['dataset']['anchors'],
scales = wandb.config['scales'],
#scales = self.log_config.scales,
n_classes = config['dataset']['n_classes'],
transform=train_transform)
- ConcatDataset
dataset = torch.utils.data.ConcatDataset([train_dataset,raw_dataset])
self.train_loader = DataLoader(
dataset = dataset,
#dataset = train_dataset,
batch_size = wandb.config['batch_size'], #self.log_config.batch_size,
num_workers = wandb.config['num_workers'], #self.log_config.num_workers,
pin_memory=True, shuffle=True, drop_last=False)
Result:
- leads to the following Error
Traceback (most recent call last):
File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\main.py", line 73, in <module>
main(args['config'])
File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\main.py", line 64, in main
agent.train()
File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\yolov3.py", line 330, in train
self._train_one_epoch()
File "C:\Users\IKT\Documents\GitHub\YOLOv3_Masterarbeit_mth\yolov3.py", line 387, in _train_one_epoch
for batch_idx, (imgs, targets) in enumerate(loop):
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\tqdm\std.py", line 1185, in __iter__
for obj in iterable:
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
data = self._next_data()
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\dataloader.py", line 1229, in _process_data
data.reraise()
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\_utils.py", line 425, in reraise
raise self.exc_type(msg)
AttributeError: Caught AttributeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\fetch.py", line 47, in fetch
return self.collate_fn(data)
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 84, in default_collate
return [default_collate(samples) for samples in transposed]
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 84, in <listcomp>
return [default_collate(samples) for samples in transposed]
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 53, in default_collate
numel = sum([x.numel() for x in batch])
File "C:\Users\IKT\anaconda3\envs\wandb\lib\site-packages\torch\utils\data\_utils\collate.py", line 53, in <listcomp>
numel = sum([x.numel() for x in batch])
AttributeError: 'numpy.ndarray' object has no attribute 'numel'
Could someone please help me with that?
Thanks in advance!