TypeError: list indices must be integers or slices, not tuple in weighted random sampler

I have this function for weighted random sampler:

def make_weights_for_balanced_classes(train_dataset, nclasses):
    
    #find total number of sample videos
    samplecount = 0
    for frame in train_dataset:
        samplecount +=1
    print('n_Samples',samplecount)
    
    #find number of samples for each class

    #n_images = len(images)
    count_per_class = [0] * nclasses
    
    for _, label, file_name in train_dataset:
        count_per_class[label] +=1
        
    #Next, we need to find the weights for each class.
    weight_per_class = [0.] * nclasses
    
    for i in range(nclasses):
        weight_per_class[i] = 1 / float(count_per_class[i])
        
    #assign weight to each sample
    weights = [0] * samplecount
    for label, file_name in enumerate(train_dataset):
        weights[file_name] = weight_per_class[label]
    
    return weights

but I am always getting this error:

TypeError: list indices must be integers or slices, not tuple

my dataset is defined like this:

class loadedDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(self.root_dir))
        self.count = [len(os.listdir(self.root_dir + '/' + c)) for c in self.classes]
        #print('self.count')
        #print(self.count)
        self.acc_count = [self.count[0]]
        for i in range(1, len(self.count)):
                self.acc_count.append(self.acc_count[i-1] + self.count[i])
        # self.acc_count = [self.count[i] + self.acc_count[i-1] for i in range(1, len(self.count))]
        
     

    def __len__(self):
        l = np.sum(np.array([len(os.listdir(self.root_dir + '/' + c)) for c in self.classes]))
        return l
    
    def __getitem__(self, idx):
        label = 1
        for i in range(len(self.acc_count)):
            if idx < self.acc_count[i]:
                label = i
                
                break
           

        class_path = self.root_dir + '/' + self.classes[label] 

        if label:
            file_path = class_path + '/' + sorted(os.listdir(class_path))[idx-self.acc_count[label]]
        else:
            file_path = class_path + '/' + sorted(os.listdir(class_path))[idx]

        _, file_name = os.path.split(file_path)

        frames = []
        #self.file_path = file_path
        # print os.listdir(file_path)
        file_list = sorted(os.listdir(file_path))
        # print file_list

        # v: maximum translation in every step
        v = 2
        offset = 0
        for i, f in enumerate(file_list):
           
            #frame = Image.open(file_path + '/' + f)
            #frame = np.load(file_path + '/' + f, allow_pickle="True")
            #print("f")
           # print(f)
            npyfile = os.path.join(file_path + '/' + f)
            frame = torch.load(npyfile)
            
            #translation
            offset += random.randrange(-v, v)
            offset = min(offset, 3 * v)
            offset = max(offset, -3 * v)
            #frame = frame.transform(frame.size, Image.AFFINE, (1, 0, offset, 0, 1, 0))
            if self.transform is not None:
                frame = self.transform[0](frame)
            frames.append(frame)
            
            
            if len(frames) == 8:
                break
        
        while len(frames) < 8:
            frames.append(frame)
        
        #print(len(frames))
        return frames, label, file_name

and I have loaded my dataset like this:


train_dataset = loadedDataset(traindir)
val_dataset = loadedDataset(valdir)

Thank you in advance

Could you describe which line of code fails exactly and what all inputs to this call are?

Hello @ptrblck , I am getting the error from this line when I am trying to assign weight to each sample:


   for label, file_name in enumerate(train_dataset):
        weights[file_name] = weight_per_class[label]

My code for finding the number of samples for each class seems to be not working too:

count_per_class = [0] * nclasses    
for _, label, file_name in train_dataset:
       count_per_class[label] +=1

It can only output the correct number of samples for the first class

Could you check what label contains and print a few objects?

1 Like

it iteratively prints the label of each class like ‘0’ and ‘1’

I have rewritten my entire function to just use some counters to iterate through classes and folders of frames which worked for my case, but its not the best solution:

def make_weights_for_balanced_classes(train_dataset, nclasses):
    
    #find total number of sample videos
    samplecount = 0
    samplecount = len(train_dataset)
    print('n_Samples',samplecount)
    
    #find number of samples for each class
    count_per_class = [0] * nclasses    
    counter = 0
    for num_samples in train_dataset.count:
        print('num_samples', num_samples)
        count_per_class[counter] = num_samples
        counter+=1

    #find the weight for each c;ass
    weight_per_class = [0.] * nclasses
    
    counter_sample = 0
    for i in range(nclasses):
        print('range class', i)
        weight_per_class[i] = 1 / float(count_per_class[counter_sample])
        counter_sample+=1
    
    #assign weight to each sample
    counter_sample1 = 0
    weights = [0] * samplecount
    for frames, label, file_name in train_dataset:
        print('label', label)
        counter_filename = 0
        for frames, label, file_name in train_dataset:
            if label == counter_sample1:
                print('file_name', file_name)
                weights[counter_filename] = weight_per_class[counter_sample1]
                counter_filename+=1
            else:
                break
            #weights[file_name] = weight_per_class[counter_sample1]
        counter_sample1+=1

    return weights