Error in self.dataset_iter = iter(dataset)

I am working on a json dataset, I manege to get the data into this shape :

[[[…],label 1],[[…].label2],…]

but I still cannot iterate tghough the data set this is my custum data set:

class JsonDataset(IterableDataset):
def init(self, files):
super(JsonDataset).init()
self.files = files

    self.data_full_list=[]
    for i in range(len(self.files)):
       
        liste_files_current_act=self.files[i]  ### list of files of this activity
       
        for j in range(len(liste_files_current_act)):
            
            with open(liste_files_current_act[j]) as file:
                 for line in file:
                   
                    line_tmp = file.readline()
                  
                    if line_tmp == None or line_tmp == '':
                        print('I got a null or empty string value for data in a file')
                        if line_tmp== None:
                            print('it is NONE')
                        if line_tmp== '':
                            print('it is a blank')
                        #input('none or blanck')
                    else:
                        res_line= json.loads(str(line_tmp))
                    
                    data_line=res_line['data']
      
                    self.data_full_list.append([data_line, i])
                
                     


    print('done')
    print(len(self.data_full_list))
 
def _len_(self):
    return(len(self.data_full_list))    

               
def __getitem__(self, idx):
    print(len(self.data_full_list))
    data,label=self.data_full_list[idx]
    
    
    
    data=torch.FloatTensor(data)
    print('type of data is ',type(data))
    print()
    print('label ',type(label))
    input()
    
   
    return(data,label)

I still get this error:

self.dataset_iter = iter(dataset)
line 146, in iter
raise NotImplementedError

NotImplementedError

IterableDatset expects an __iter__ method so if you want to use __getitem__ use torch.utils.data.Dataset instead.

1 Like

I am still having an error:

return iter(range(len(self.data_source)))

TypeError: object of type ‘JsonDataset’ has no len()

Your code snippet seems to have a typo in __len__(self) (note the two underscores while your code uses single underscores).

1 Like