I am working on a dataset that I extracted from JSON files, but the training is so slow I was wondering what could be wrong with my dataset definition:
class JsonDataset(torch.utils.data.Dataset):
def init(self, files):
super(JsonDataset).init()
self.files = files
self.data_full_list=[]
for i in range(len(self.files)):
liste_files_current_act=self.files[i]
for j in range(len(liste_files_current_act)):
with open(liste_files_current_act[j]) as file:
for line in file:
line_tmp = file.readline()
if line_tmp == None or line_tmp == '':
print('I got a null or empty string value for data in a file')
if line_tmp== None:
print('it is NONE')
if line_tmp== '':
print('it is a blank')
else:
res_line= json.loads(str(line_tmp))
data_line=res_line['data']
self.data_full_list.append([data_line, i])
def __len__(self):
return(len(self.data_full_list))
def __getitem__(self, idx):
start = time.time()
data,label=self.data_full_list[idx]
data=data[0:164]
data=torch.FloatTensor(data)
if timestep=='5sec':
data=data.repeat(250,1)
if timestep=='10sec':
data=data.repeat(500,1)
if timestep=='15sec':
data=data.repeat(750,1)
end1 = time.time()
print('time point 1',end1 - start)
data_uwb1=data
data_uwb2=data
data_uwb3=data
np_UWB3 = data_uwb3.cpu().detach().numpy()
np_UWB2 = data_uwb2.cpu().detach().numpy()
np_UWB1 = data_uwb1.cpu().detach().numpy()
np_fusion=np.stack((np_UWB1,np_UWB2,np_UWB3), axis=0)
data=torch.from_numpy(np_fusion)
end2 = time.time()
print('time point 2',end2 - start)
return(data,label)
it seems to me that once the training enter the training loop it takes much time to start the first iteration of this loop:
for data, target in train_loader: