Hi, I have a working data loader, however while I am training it on the gpu cluster the average time taken to run per epoch is too much (approx. 25 mins). I am using PNG images for training with an average dimension of the image around (2500x5000). I want to make the training more faster. Please suggest what more can i do in order to do the training fast. Attached is the code of the data loader.
class MammographyCBISDDSM(Dataset):
def __init__(self, excel_file, category, transform=None):
"""
Args:
excel_file (string): Path to the excel file with annotations.
category (string) : 'Classification' for Benign and Malignant. 'Subtypes' for Subtype Classification
transform (callable, optional): Optional transform to be applied
"""
self.mammography = pd.read_csv(excel_file,sep = ';')
self.category = category
self.transform = transform
def __len__(self):
return len(self.mammography)
def class_name_to_labels(self, idx):
if self.category == 'Classification':
class_name = self.mammography.iloc[idx, 9]
if class_name == 'MALIGNANT':
labels = 0.0
elif class_name == 'BENIGN':
labels = 1.0
elif class_name == 'BENIGN_WITHOUT_CALLBACK':
labels = 2.0
return labels
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_folder = self.mammography.iloc[idx, 11]
image_array=cv2.imread(img_folder)
image_array = image_array.pixel_array
image_array = image_array * 1.0 / image_array.max()
image_array = torch.from_numpy(image_array.astype(np.float32))
image_array = image_array.repeat(3, 1, 1)
if self.transform:
image_array = self.transform(image_array)
labels = self.class_name_to_labels(idx)
labels = torch.from_numpy(np.array(labels))
return image_array, labels