Hi, at first, I used the following methods to read my training data:
train_df, val_df = train_test_split(train_csv, test_size=0.1, random_state=2018, stratify=train_csv.diagnosis)
train_df.reset_index(drop=True, inplace=True)
val_df.reset_index(drop=True, inplace=True)
def expand_path(p):
p = str(p)
if isfile(train + p + ".png"):
return train + (p + ".png")
if isfile(train_2015 + p + '.png'):
return train_2015 + (p + ".png")
if isfile(test + p + ".png"):
return test + (p + ".png")
return p
def crop_image1(img,tol=7):
# img is image data
# tol is tolerance
mask = img>tol
return img[np.ix_(mask.any(1),mask.any(0))]
def crop_image_from_gray(img,tol=7):
if img.ndim ==2:
mask = img>tol
return img[np.ix_(mask.any(1),mask.any(0))]
elif img.ndim==3:
gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
mask = gray_img>tol
check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
if (check_shape == 0): # image is too dark so that we crop out everything,
return img # return original image
else:
img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
# print(img1.shape,img2.shape,img3.shape)
img = np.stack([img1,img2,img3],axis=-1)
# print(img.shape)
return img
class MyDataset(Dataset):
def __init__(self, dataframe, transform=None):
self.df = dataframe
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
label = self.df.diagnosis.values[idx]
label = np.expand_dims(label, -1)
p = self.df.id_code.values[idx]
p_path = expand_path(p)
image = cv2.imread(p_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = crop_image_from_gray(image)
image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
image = cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , 30) ,-4 ,128)
image = transforms.ToPILImage()(image)
if self.transform:
image = self.transform(image)
return image, label
train_transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation((-120, 120)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
trainset = MyDataset(train_df, transform =train_transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)
valset = MyDataset(val_df, transform =train_transform)
val_loader = torch.utils.data.DataLoader(valset, batch_size=32, shuffle=False, num_workers=4)
I think it should be a conventional way of writing. But in the course of training, I found that maybe it was bottleneck that caused my GPU usage to be not very high, and it took me a long time to train an epoch (about 600 seconds).
So I changed the way I read the data. I want to read all the data at once. The code after the modification is as follows:
train_df, val_df = train_test_split(train_csv, test_size=0.1, random_state=2018, stratify=train_csv.diagnosis)
train_df.reset_index(drop=True, inplace=True)
val_df.reset_index(drop=True, inplace=True)
def expand_path(p):
p = str(p)
if isfile(train + p + ".png"):
return train + (p + ".png")
if isfile(train_2015 + p + '.png'):
return train_2015 + (p + ".png")
if isfile(test + p + ".png"):
return test + (p + ".png")
return p
def crop_image1(img,tol=7):
# img is image data
# tol is tolerance
mask = img>tol
return img[np.ix_(mask.any(1),mask.any(0))]
def crop_image_from_gray(img,tol=7):
if img.ndim ==2:
mask = img>tol
return img[np.ix_(mask.any(1),mask.any(0))]
elif img.ndim==3:
gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
mask = gray_img>tol
check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
if (check_shape == 0): # image is too dark so that we crop out everything,
return img # return original image
else:
img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
# print(img1.shape,img2.shape,img3.shape)
img = np.stack([img1,img2,img3],axis=-1)
# print(img.shape)
return img
from toolz.itertoolz import partition_all, concatv
from joblib import Parallel, delayed
def prep_one(img, trans):
image = cv2.imread(img)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = crop_image_from_gray(image)
image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
image = cv2.addWeighted(image, 4, cv2.GaussianBlur(image, (0, 0), 30), -4, 128)
image = transforms.ToPILImage()(image)
image = trans(image)
return image.unsqueeze(0)
def preprocess(df, image_path, trans):
labels = [1.0] * len(df)
if 'diagnosis' in df.columns:
labels = df.diagnosis.tolist()
labels = np.expand_dims(labels, -1)
# images = []
# for idx in tqdm(df.id_code.tolist()):
# p_path = image_path + idx + '.png'
# images.append(prep_one(p_path, trans))
parallel = Parallel(2, backend="multiprocessing", verbose=5)
images = parallel(
delayed(prep_one)(image_path + idx + '.png', trans) for idx in df.id_code.tolist()
)
image_tensor = torch.cat(images)
label_tensor = torch.tensor(labels, dtype=torch.float)
del images
gc.collect()
return image_tensor, label_tensor
train_transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation((-120, 120)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
train_x, train_y = preprocess(train_df, train, train_transform)
valid_x, valid_y = preprocess(val_df, train, train_transform)
trainset = torch.utils.data.TensorDataset(train_x, train_y)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
valset = torch.utils.data.TensorDataset(valid_x, valid_y)
val_loader = torch.utils.data.DataLoader(valset, batch_size=32, shuffle=False)
It only takes about 20 seconds to train an epoch after modification. But I found that the training model was much worse than before. Is there any mistake in this way of reading?