Converting annotation based data for multiple inputs

I want to convert the below code into multiple input type,i.e I want to take in all the images for a subject at a once and further have to send it through model.

class Dataset(data.Dataset):
    def __init__(self,img_dir,data,valid_id,max_len,img_height,img_width,transform):
       #here valid id is the ids of image to take input
        self.img_dir = img_dir
        self.initial_dataset(data,valid_id)
        self.max_len = max_len
        self.img_height = img_height
        self.img_width = img_width
        self.transform = transform

    def initial_dataset(self,data,valid_id):
        self.fixation = []
        self.label = []
        self.img_id = []
        self.img_size = []

        for img_id in data.keys():
            if not img_id in valid_id:
                continue
            for group_label, group in enumerate(['ct','ad']):
                self.fixation.extend(data[img_id][group]['fixation'])
                self.img_id.extend([os.path.join(self.img_dir,str(img_id)+'.png')]*len(data[img_id][group]['fixation']))
                self.label.extend([group_label]*len(data[img_id][group]['fixation']))
                self.img_size.extend([data[img_id]['img_size']]*len(data[img_id][group]['fixation']))

    def get_fix(self,idx):
        fixs = self.fixation[idx]
        y_lim, x_lim = self.img_size[idx]
        fixation = []
        invalid = 0
        # only consider the first k fixations
        for i in range(self.max_len):
            if i+1 <= len(fixs):
                y_fix, x_fix = fixs[i]
                x_fix = int(x_fix*(self.img_width/float(x_lim))/32)
                y_fix = int(y_fix*(self.img_height/float(y_lim))/33)
                if x_fix >=0 and y_fix>=0:
                    fixation.append(y_fix*25 + x_fix) # get the corresponding index of fixation on the downsampled feature map
                else:
                    invalid += 1
            else:
                fixation.append(0) # pad if necessary
        for i in range(invalid):
            fixation.append(0)
        fixation = torch.from_numpy(np.array(fixation).astype('int'))
        return fixation

    def __getitem__(self,index):
        img = Image.open(self.img_id[index])
        if self.transform is not None:
            img = self.transform(img)
        label = torch.FloatTensor([self.label[index]])
        fixation = self.get_fix(index)
        return img, label, fixation

    def __len__(self,):
        return len(self.fixation)

Could you explain, what the fixation is supposed to do and where you are stuck at the moment, please?

@ptrblck
for the above code there are eye fixations of subjects ‘idx’ corresponding to image (total 100 images) and the above code returns the dataset which is loaded using “torch.utils.data.DataLoader”.

I want to pass all the images and their respective fixations for one ‘idx’ all at a time.

for j, (img,target,fix) in enumerate(trainloader):
                if len(img) < args.batch_size:
                    continue
                img, target, fix = Variable(img), Variable(target.type(torch.FloatTensor)), Variable(fix,requires_grad=False)
                img, target, fix = img.cuda(), target.cuda(), fix.cuda()
                optimizer.zero_grad()

                pred = model(img,fix)
 

The above code shows how I pass images on by one to model, instead I want to sent it on ‘idx’ and all at a time.
So I am unable to figure out how to modify my dataset and further the above part of code to do so.

If I understand the use case correctly, you would like to load all samples for a specific fix value?
If that’s the case you could either use a custom sampler, which returns batch samples sorted by the fix values. Alternatively, you could probably use different datasets for each fix and load the batches from these datasets as needed.
Is your dataset containing the same amount for each fix value? If not, this would mean that the batch size would be different, but I assume this fits your use case?