Training epochs are so slow

OBouldjedri · February 1, 2022, 10:10pm

I am working on a dataset that I extracted from JSON files, but the training is so slow I was wondering what could be wrong with my dataset definition:

class JsonDataset(torch.utils.data.Dataset):
def init(self, files):
super(JsonDataset).init()
self.files = files

    self.data_full_list=[]
    for i in range(len(self.files)):
       
        liste_files_current_act=self.files[i] 

        for j in range(len(liste_files_current_act)):

             with open(liste_files_current_act[j]) as file:
                 for line in file:
                   
                    line_tmp = file.readline()
                  
                    if line_tmp == None or line_tmp == '':
                        print('I got a null or empty string value for data in a file')
                        if line_tmp== None:
                            print('it is NONE')
                        if line_tmp== '':
                            print('it is a blank')
                        
                    else:
                        res_line= json.loads(str(line_tmp))
                    
                    data_line=res_line['data']
      
                    self.data_full_list.append([data_line, i])
          
def __len__(self):
    return(len(self.data_full_list))    

               
def __getitem__(self, idx):
    
    start = time.time()
    
    data,label=self.data_full_list[idx]
   
    data=data[0:164]
    
    data=torch.FloatTensor(data)
   
    if timestep=='5sec':
        data=data.repeat(250,1)
    
    if timestep=='10sec':
        
        data=data.repeat(500,1)    
    
    if timestep=='15sec':
        data=data.repeat(750,1)  
        
        
    end1 = time.time()
    print('time point 1',end1 - start)    
    
    data_uwb1=data
    data_uwb2=data
    data_uwb3=data

    np_UWB3 = data_uwb3.cpu().detach().numpy()
    np_UWB2 = data_uwb2.cpu().detach().numpy()
    np_UWB1 = data_uwb1.cpu().detach().numpy()
    
    np_fusion=np.stack((np_UWB1,np_UWB2,np_UWB3), axis=0)
    
    
    
    data=torch.from_numpy(np_fusion)
    end2 = time.time()
    print('time point 2',end2 - start)
     
    return(data,label)

it seems to me that once the training enter the training loop it takes much time to start the first iteration of this loop:
for data, target in train_loader: