@ptrblck Thank you so much. You helped me to get a better understanding of customised database.
Where are you getting the index from??
The index will load the images from the image path which stored in __init__
@ptrblck I saw a couple of example for schedule learning and save the best model, however most of them are implemented for transfer learning. I have hard time to generalise it to my own problem. I am following this tutorial for scheduling the learning rate and save the best model.
I split the previous dataset two three groups of train, validation and test and here is the code:
from custom_dataset import CustomDataset
import torch
import glob
# get all the image and mask path and number of images
folder_data = glob.glob("D:\\Neda\\Pytorch\\U-net\\my_data\\imagesResized\\*.png")
folder_mask = glob.glob("D:\\Neda\\Pytorch\\U-net\\my_data\\labelsResized\\*.png")
# split these path using a certain percentage
len_data = len(folder_data)
print("count of dataset: ", len_data)
# count of dataset: 992
split_1 = int(0.8 * len(folder_data))
split_2 = int(0.9 * len(folder_data))
folder_data.sort()
train_image_paths = folder_data[:split_1]
print("count of train images is: ", len(train_image_paths))
#count of train images is: 793
valid_image_paths = folder_data[split_1:split_2]
print("count of validation image is: ", len(valid_image_paths))
#count of validation image is: 99
test_image_paths = folder_data[split_2:]
print("count of test images is: ", len(test_image_paths))
#count of test images is: 100
#print(test_image_paths)
train_mask_paths = folder_mask[:split_1]
valid_mask_paths = folder_mask[split_1:split_2]
test_mask_paths = folder_mask[split_2:]
train_dataset = CustomDataset(train_image_paths, train_mask_paths)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=2)
valid_dataset = CustomDataset(valid_image_paths, valid_mask_paths)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=True, num_workers=2)
test_dataset = CustomDataset(test_image_paths, test_mask_paths)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2)
DataLoader = {
'train': train_loader,
'valid': valid_loader,
#'test': test_loader
}
when I follow the tutorial when it start for training at first epoch I am getting this error. could you please give me some suggestion how can I fix it? I assume is about DataLoader in above snippet.
for i, data in DataLoader[phase]:
ValueError: too many values to unpack (expected 2)
also, I am wondering how can I get the dataset_size[phase] in epoch_loss = running_loss / dataset_sizes[phase]
if I need to create a new thread please let me know.
Could you check the output of your Dataset
?
Based on the example code in this thread it looks like two values should be returned, but I’m not sure how your current implementation works.
Just print the length of the returned values:
len(train_dataset[0])
You could get the size of the Dataset
using this code snippet:
len(dataloaders[phase].dataset)
Also, as you can see, I’ve changed the name from DataLoader
to dataloaders
, since DataLoader
is used by PyTorch and I don’t want to accidentally mask it somehow.
this is current Dataset and it’s returning four values.
import torch
from torch.utils.data.dataset import Dataset # For custom data-sets
import torchvision.transforms as transforms
from PIL import Image
import numpy
import torchvision.transforms.functional
class CustomDataset(Dataset):
def __init__(self, image_paths, target_paths): # initial logic happens like transform
self.image_paths = image_paths
self.target_paths = target_paths
self.transforms = transforms.ToTensor()
self.mapping = {
0: 0,
255: 1
}
def mask_to_class(self, mask):
for k in self.mapping:
mask[mask==k] = self.mapping[k]
return mask
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
mask = Image.open(self.target_paths[index])
t_image = image.convert('L')
t_image = self.transforms(t_image)
#mask = torch.from_numpy(np.array(mask)) #this is for BMCC dataset
mask = torch.from_numpy(numpy.array(mask, dtype=numpy.uint8)) # this is for my dataset(lv)
mask = self.mask_to_class(mask)
mask = mask.long()
return t_image, mask, self.image_paths[index], self.target_paths[index]
def __len__(self): # return count of sample we have
return len(self.image_paths)
ohhh, thank you for the dataloader.
In that case, you should assign all four returned value to a single one or to four separate ones:
for, data, target, data_path, target_path in dataloaders[phase]:
...
I think your code should work even if you use the same name, as it seems you didn’t import DataLoader
directly. However, if it’s not too much of an effort, I would rename your dict.
Thanks a lot. It works now but I think the training process is too slow in compare with before I did’t have validation and scheduling the learning rate. 4 epoch and step_size=2
completed in 7m. I will post a new thread for that as I have some questions about the scheduling the learning rate.
@ptrblck I am wondering how can I add a condition to CustomDataset for data augmentation only for few specific input images for training (image_207, image_387, image_502, image_508, image_509, image_520, image_597)
.
This is the CustomDataset snippet, basically, I added self.transformm
to the previous code which posted above. I think I need to add a if condition
in __getitem__
to apply self.transformm
only on image. could you please point me in the right direction.
class CustomDataset(Dataset):
def __init__(self, image_paths, target_paths, transform_images):
self.image_paths = image_paths
self.target_paths = target_paths
#self.aug = aug
self.transformm = transforms.Compose([tf.rotate(10),
tf.affine(0.2,0.2)])
self.transform = transforms.ToTensor()
self.transform_images = transform_images
self.mapping = {
0: 0,
255: 1
}
def mask_to_class(self, mask):
for k in self.mapping:
mask[mask==k] = self.mapping[k]
return mask
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
mask = Image.open(self.target_paths[index])
t_image = image.convert('L')
t_image = self.transforms(t_image)
if any([img in image for img in transform_images]):
t_image = self.transformm(t_image)
mask = torch.from_numpy(numpy.array(mask, dtype=numpy.uint8))
mask = self.mask_to_class(mask)
mask = mask.long()
return t_image, mask, self.image_paths[index], self.target_paths[index]
def __len__(self): # return count of sample we have
return len(self.image_paths)
A simple approach would be to get the indices of these particular images and add a condition before applying the transformation.
Alternatively you could also check against self.image_paths
:
def __init__(self, ..., transform_images):
self.transform_images = transform_images
...
def __getitem__(self, index):
...
if any([img in path for img in transform_images]):
t_image = self.transformm(t_image)
...
@ptrblck thank you. I still couldn’t manage do this. Could you please explain to me what is self.transform_images
in __init__
? where and how should I specify the indices of images (example image_207, image_208) that needs to be transform if I want to do the alternative solution?
self.transform_images
would be a list containing all image names which should be transformed.
In __getitem__
the current image path will be checked against all image names in self.transform_images
(the self.
is missing in my code snippet).
I’m not sure, how you are creating the image paths, but once you get all the paths, you could use the .index()
method on this list or alternatively use a condition to get the image indices.
@ptrblck I have been trying to do augmentation with no luck.
This is the snippet for custom_dataset
class CustomDataset(Dataset):
def __init__(self, image_paths, target_paths, transform_images):
self.image_paths = image_paths
self.target_paths = target_paths
self.transformm = transforms.Compose([tf.rotate(10),
tf.affine(0.2,0.2)])
self.transform = transforms.ToTensor()
self.transform_images = transform_images
self.mapping = {
0: 0,
255: 1
}
def mask_to_class(self, mask):
for k in self.mapping:
mask[mask==k] = self.mapping[k]
return mask
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
mask = Image.open(self.target_paths[index])
t_image = image.convert('L')
t_image = self.transforms(t_image) # transform to tensor
if any([img in image for img in self.transform_images]):
t_image = self.transformm(t_image) #augmentation
mask = torch.from_numpy(numpy.array(mask, dtype=numpy.uint8))
mask = self.mask_to_class(mask)
mask = mask.long()
return t_image, mask, self.image_paths[index], self.target_paths[index]
def __len__(self):
return len(self.image_paths)
and here is there snippet for splitting the dataset and define dataloaders
from custom_dataset import CustomDataset
folder_data = glob.glob("F:\\my_data\\imagesResized\\*.png")
folder_mask = glob.glob("F:\\my_data\\labelsResized\\*.png")
folder_data.sort(key = len)
folder_mask.sort(key = len)
#print(folder_data)
len_data = len(folder_data)
print("count of dataset: ", len_data)
print(80 * '_')
test_image_paths = folder_data[794:] #793
print("count of test images is: ", len(test_image_paths))
test_mask_paths = folder_mask[794:]
print("count of test mask is: ", len(test_mask_paths))
assert len(folder_data) == len(folder_mask)
indices = list(range(len(folder_data)))
#print(indices)
random.shuffle(indices)
#print(indices)
indices.copy()
#print(70 * '_')
#print(indices)
image_indices = [folder_data[i] for i in indices]
mask_indices = [folder_mask[i] for i in indices]
#print(mask_indices)
split_1 = int(0.6 * len(image_indices))
split_2 = int(0.8 * len(image_indices)+1)
train_image_paths = image_indices[:split_1]
print("count of training images is: ", len(train_image_paths))
train_mask_paths = mask_indices[:split_1]
print("count of training mask is: ", len(train_image_paths))
valid_image_paths = image_indices[split_1:split_2]
print("count of validation image is: ", len(valid_image_paths))
valid_mask_paths = mask_indices[split_1:split_2]
print("count of validation mask is: ", len(valid_image_paths))
#print(valid_mask_paths)
print(80* '_')
print(valid_image_paths)
transform_images = glob.glob("F:\\my_data\\imagesResized\\P164_ES_1.png")
#transform_images = list(folder_data['P164_ES_1', 'P164_ES_2','P164_ES_3','P165_ED_1',
# 'P165_ED_2', 'P165_ED_3', 'P165_ES_1', 'P165_ES_2','P165_ES_3',
# 'P166_ED_1', 'P166_ED_2'])
train_dataset = CustomDataset(train_image_paths, train_mask_paths, transform_images)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=2)
valid_dataset = CustomDataset(valid_image_paths, valid_mask_paths, transform_images)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=True, num_workers=2)
test_dataset = CustomDataset(test_image_paths, test_mask_paths, transform_images)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2)
dataLoaders = {
'train': train_loader,
'valid': valid_loader,
'test': test_loader,
}
in the second snippet transform_images = list(folder_data['P164_ES_1', 'P164_ES_2','P164_ES_3','P165_ED_1', 'P165_ED_2', 'P165_ED_3', 'P165_ES_1', 'P165_ES_2','P165_ES_3', 'P166_ED_1', 'P166_ED_2'])
is list of image name that I need to augment.
I really appreciate if you point me to the right direction. at the moment is giving an error __init__() takes 3 positional arguments but 4 were given
I also don’t have any idea how can I do it for mask of images (my task is segmentation)
It seems you are passing the torchvision.transforms.functional
methods to transforms.Compose
instead of the classes.
In case you would like to use the functional API, you could just apply these methods in your __getitem__
method:
if any([img in image for img in self.transform_images]):
t_image = tf.rotate(t_image, 10)
t_image = tf.affine(t_image, 0, (0, 0), 0.2, 0.2)
Or alternativelywrap them in transforms.Lambda
:
transform = transforms.Compose([
transforms.Lambda(lambda x: TF.rotate(x, 10)),
transforms.Lambda(lambda x: TF.affine(x,
angle=0,
translate=(0, 0),
scale=0.2,
shear=0.2))
])
I just assumed, you would like to set the scale
and shear
argument to 0.2
. If that’s not correct, you should of course correct my code.
@ptrblck Thanks a lot. Also, my main problem is apply augmentation on specific indices. In the second above snippet, which is splitting the dataset and define the dataloader do I need to make a list of indices which need to be augment? Is there any example for this?
Try to pass a list containing all images names which should be transformed.
In __getitem__
you should compare the current image_path
not the image itself.
Here is a simple example for a folder containing 5 images:
class CustomDataset(Dataset):
def __init__(self, image_paths, transform_images):
self.image_paths = image_paths
self.transform_images = transform_images
self.transformm = transforms.Lambda(lambda x: TF.affine(x,
angle=0,
translate=(0, 0),
scale=0.2,
shear=0.2))
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
if any([img in self.image_paths[index] for img in self.transform_images]):
print('applying special transformation')
image = self.transformm(image) #augmentation
image = TF.to_tensor(image)
return image, self.image_paths[index]
def __len__(self):
return len(self.image_paths)
image_paths = glob.glob('./data/fake_folders/class0/*.png')
print(image_paths)
> ['./data/fake_folders/class0/0.png', './data/fake_folders/class0/1.png', './data/fake_folders/class0/3.png', './data/fake_folders/class0/2.png', './data/fake_folders/class0/4.png']
transform_images = ['0.png', '1.png'] # apply special transformation only on first two images
print(transform_images)
> ['0.png', '1.png']
dataset = CustomDataset(image_paths, transform_images)
for data, path in dataset:
print(path)
> applying special transformation
./data/fake_folders/class0/0.png
applying special transformation
./data/fake_folders/class0/1.png
./data/fake_folders/class0/3.png
./data/fake_folders/class0/2.png
./data/fake_folders/class0/4.png
Thank you very much @ptrblck.
As I am doing segmentation, I did repeat the same thing for masks augmentation as well. Is it correct?
Could you please let me know how can I display some transform images and correspond mask and where these augmented images and mask stored? or is this transformation happening during training?
Here is the script:
import torch
from torch.utils.data.dataset import Dataset # For custom data-sets
import torchvision.transforms as transforms
import torchvision.transforms.functional as tf
from PIL import Image
import numpy
import glob
import matplotlib.pyplot as plt
from split_dataset import test_loader
import os
class CustomDataset(Dataset):
def __init__(self, image_paths, target_paths, transform_images, transform_masks):
self.image_paths = image_paths
self.target_paths = target_paths
self.transform_images = transform_images
self.transform_masks = transform_masks
self.transformm = transforms.Compose([transforms.Lambda(lambda x: tf.rotate(x, 10)),
transforms.Lambda(lambda x: tf.affine(x, angle=0,
translate=(0, 0),
scale=0.2,
shear=0.2))
])
self.transform = transforms.ToTensor()
self.mapping = {
0: 0,
255: 1
}
def mask_to_class(self, mask):
for k in self.mapping:
mask[mask==k] = self.mapping[k]
return mask
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
mask = Image.open(self.target_paths[index])
if any([img in self.image_paths[index] for img in self.transform_images]):
print('applying special transformation')
image = self.transformm(image) #augmentation
if any([msk in self.target_paths[index] for msk in self.transform_masks]):
print('applying special transformation')
image = self.transformm(mask) #augmentation
t_image = image.convert('L')
t_image = self.transform(t_image) # transform to tensor for image
mask = self.transform(mask) # transform to tensor for mask
mask = torch.from_numpy(numpy.array(mask, dtype=numpy.uint8))
mask = self.mask_to_class(mask)
mask = mask.long()
return t_image, mask, self.image_paths[index], self.target_paths[index]
def __len__(self): # return count of sample we have
return len(self.image_paths)
image_paths = glob.glob("D:\\Neda\\Pytorch\\U-net\\my_data\\imagesResized\\*.png")
target_paths = glob.glob("D:\\Neda\\Pytorch\\U-net\\my_data\\labelsResized\\*.png")
transform_images = ['image_981.png', 'image_982.png','image_983.png', 'image_984.png', 'image_985.png',
'image_986.png','image_987.png','image_988.png','image_989.png','image_990.png',
'image_991.png'] # apply special transformation only on these images
print(transform_images)
#['image_991.png', 'image_991.png']
transform_masks = ['image_labeled_981.png', 'image_labeled_982.png','image_labeled_983.png', 'image_labeled_984.png',
'image_labeled_985.png', 'image_labeled_986.png','image_labeled_987.png','image_labeled_988.png',
'image_labeled_989.png','image_labeled_990.png',
'image_labeled_991.png']
dataset = CustomDataset(image_paths, target_paths, transform_images, transform_masks)
for transform_images in dataset:
#print(transform_images)
transform_images = Image.open(os.path.join(image_paths, transform_images))
transform_images = numpy.array(transform_images)
transform_masks = Image.open(os.path.join(target_paths, transform_masks))
transform_masks = numpy.array(transform_masks)
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=1, sharex=True, sharey=True, figsize = (6,6))
img1 = ax1.imshow(transform_images, cmap='gray')
ax1.axis('off')
img2 = ax2.imshow(transform_masks)
ax1.axis('off')
plt.show()
it is causing an error path = os.fspath(path) TypeError: expected str, bytes or os.PathLike object, not tuple
. Also, I tried with cv2 and another error which is about float image
Another question is since I did change now CustomDataset do I need to change the way I was loading my dataloader for training?
Since self.transformm
does not contain any random transformations, you can apply it on your image and mask without a problem.
Note that you have a small error in your code, i.e. you are assigning the transformed mask
to image
instead of mask
:
if any([msk in self.target_paths[index] for msk in self.transform_masks]):
print('applying special transformation')
image = self.transformm(mask) #augmentation
The transformation is applied on the fly each time __getitem__
is called.
If you want to visualize some examples, you could either try to visualize the already transformed tensors:
x, y = dataset[0]
# "denormalize" if necessary
image, mask = TF.to_pil_image(x), TF.to_pil_image(y)
or alternatively if you need to visualize a lot of examples, you could also write another class method just for the augmentation:
class CustomDataset(Dataset):
def __init__(self):
pass
def get_sample(self, index):
# your loading and augmentation code
return image, mask
def __getitem__(self, index):
image, mask = self.get_sample(index)
# Transform to tensor
image = self.transform(image)
mask = ...
def __len__(self):
...
image, mask = dataset.get_sample(0)
Note that you might have another error in your code, since you are applying ToTensor
on your mask.
Since your mask should contain class indices, this would normalize the values to the range [0, 1]
, which might destroy it.
Could you check the values of your mask and make sure all pixel values contain a valid class index?
Regarding the last error message: you are trying to pass a complete list of file names to Image.open
.
This method only loads a single image, so you should use a loop if you want to load multiple images.
@ptrblck I really appreciate your help. You are right. I shouldn’t do transform on mask. Then if I remove that line mask = self.transform(mask) # transform to tensor for mask
would it be fine?
even if I don’t apply transform on mask the mask is all zeros. no idea! maybe I am doing something wrong. Also, transformation on image destroyed the image as well.
here is the code:
import torch
from torch.utils.data.dataset import Dataset # For custom data-sets
import torchvision.transforms as transforms
import torchvision.transforms.functional as tf
from PIL import Image
import numpy
import glob
import matplotlib.pyplot as plt
from split_dataset import test_loader
class CustomDataset(Dataset):
def __init__(self, image_paths, target_paths, transform_images, transform_masks):
self.image_paths = image_paths
self.target_paths = target_paths
self.transform_images = transform_images
self.transform_masks = transform_masks
self.transformm = transforms.Compose([transforms.Lambda(lambda x: tf.rotate(x, 10)),
transforms.Lambda(lambda x: tf.affine(x, angle=0,
translate=(0, 0),
scale=0.2,
shear=0.2))
])
self.transform = transforms.ToTensor()
self.mapping = {
0: 0,
255: 1
}
def mask_to_class(self, mask):
for k in self.mapping:
mask[mask==k] = self.mapping[k]
return mask
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
mask = Image.open(self.target_paths[index])
if any([img in self.image_paths[index] for img in self.transform_images]):
print('applying special transformation on images')
image = self.transformm(image) #augmentation
if any([msk in self.target_paths[index] for msk in self.transform_masks]):
print('applying special transformation on masks')
mask = self.transformm(mask) #augmentation
t_image = image.convert('L')
t_image = self.transform(t_image) # transform to tensor for image
#mask = self.transform(mask) # transform to tensor for mask
mask = torch.from_numpy(numpy.array(mask, dtype=numpy.uint8))
mask = self.mask_to_class(mask)
mask = mask.long()
return t_image, mask, self.image_paths[index], self.target_paths[index]
def __len__(self): # return count of sample we have
return len(self.image_paths)
image_paths = glob.glob("D:\\Neda\\Pytorch\\U-net\\my_data\\imagesResized\\*.png")
target_paths = glob.glob("D:\\Neda\\Pytorch\\U-net\\my_data\\labelsResized\\*.png")
transform_images = ['image_981.png', 'image_982.png','image_983.png', 'image_984.png', 'image_985.png',
'image_986.png','image_987.png','image_988.png','image_989.png','image_990.png',
'image_991.png'] # apply special transformation only on these images
print(transform_images)
#['image_991.png', 'image_991.png']
transform_masks = ['image_labeled_981.png', 'image_labeled_982.png','image_labeled_983.png', 'image_labeled_984.png',
'image_labeled_985.png', 'image_labeled_986.png','image_labeled_987.png','image_labeled_988.png',
'image_labeled_989.png','image_labeled_990.png',
'image_labeled_991.png']
dataset = CustomDataset(image_paths, target_paths, transform_images, transform_masks)
image, mask,ti,tm = dataset[981]
layer, height, width = (image.size())
print(layer, height, width)
image = torch.FloatTensor(1, height, width)
mask = torch.FloatTensor(1, height, width)
image, mask = tf.to_pil_image(image), tf.to_pil_image(mask) #Convert a tensor or an ndarray to PIL Image
min_img, max_img = image.getextrema()
print(min_img, max_img) #[0,243]
min_msk, max_msk = mask.getextrema()
print(min_msk, max_msk) # [0,255]
image = numpy.array(image) / 255
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, figsize = (6,6))
img1 = ax1.imshow(image, cmap='gray')
ax1.axis('on')
img2 = ax2.imshow(mask)
ax2.axis('on')
plt.show()
here is what it produced: