As far as I understand, that RandomCropping
is performing random crops of given size for each batch, is there a way, that I can strict the same location, a crop is taken from ?
we have torchvision.transforms.CenterCrop
. Does that help you?
it always give a crop around the center, right ?, so if all images/tensors, are of same size, the crop place will be the same
@InnovArul I will try to say clarify exactly what I want
It is ok to have RandomCrop
in my case, but what I want that the random position changes every 2nd batch
so for batch 1, the crop is taken from position (x,y)
, and from batch 2, the same position (x,y)
, but batch 3 and 4, will be from a different random position, and so on
I know this is something really special in my case, but is there any way to have this ?
You could use a counter to chose to resample the random crop parameters or reuse them.
Here is a small (untested) example:
class MyDataset(Dataset):
def __init__(self, image_paths):
self.image_paths = image_paths
self.crop_indices = []
def transform(self, image, resample):
# Resize
resize = transforms.Resize(size=(520, 520))
image = resize(image)
# Random crop
if resample:
self.crop_indices = transforms.RandomCrop.get_params(
image, output_size=(512, 512))
i, j, h, w = self.crop_indices
image = TF.crop(image, i, j, h, w)
# Random horizontal flipping
if random.random() > 0.5:
image = TF.hflip(image)
# Random vertical flipping
if random.random() > 0.5:
image = TF.vflip(image)
# Transform to tensor
image = TF.to_tensor(image)
return image
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
resample = index % 2 == 0
x = self.transform(image, resample)
return x
def __len__(self):
return len(self.image_paths)
Let me know, if that works for you.
Hello @ptrblck
Sorry for my late reply
Can RandomCrop
works on tensors not images ?
I will try to explain why
I have like this
def get_data_loader_folder(input_folder, batch_size, train, new_size=None,
height=256, width=256, num_workers=4, crop=True):
transform_list = [transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5),
(0.5, 0.5, 0.5))]
transform_list = [transforms.RandomCrop((height, width))] + transform_list if crop else transform_list
transform_list = [transforms.Resize((256, 256))] + transform_list if new_size is not None else transform_list
transform_list = [transforms.RandomHorizontalFlip()] + transform_list if train else transform_list
transform = transforms.Compose(transform_list)
dataset = ImageFolder(input_folder, transform=transform)
loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=train, drop_last=True, num_workers=num_workers)
return loader
class ImageFolder(data.Dataset):
def __init__(self, root, transform=None, return_paths=False,
loader=default_loader):
#imgs = sorted(make_dataset(root))
# shuffle implicit pairs
if "test" in root:
imgs = sorted(make_dataset(root))
else:
imgs = shuffle_pairs(sorted(make_dataset(root)))
if len(imgs) == 0:
raise(RuntimeError("Found 0 images in: " + root + "\n"
"Supported image extensions are: " +
",".join(IMG_EXTENSIONS)))
self.root = root
self.imgs = imgs
self.transform = transform
self.return_paths = return_paths
self.loader = loader
def __getitem__(self, index):
path = self.imgs[index]
img = self.loader(path)
if self.transform is not None:
img = self.transform(img)
if self.return_paths:
return img, path
else:
return img
def __len__(self):
return len(self.imgs)
So when I call get_data_loader_folder(
), I set the crop=False
, because I don’t want RandomCrop
in all batches, but as I told you before, that I want to crop at the same location for every 2 sequential batches
in the class ImageFolder()
here is what happens, it applies all the transformation composed based on the transform_list, then it returns a tensor with the transformations applied … back again to the RandomCrop
Is there a way to apply it here ?
def __getitem__(self, index):
path = self.imgs[index]
img = self.loader(path)
if self.transform is not None:
img = self.transform(img)
##RANDOM CROP##
if self.return_paths:
return img, path
By taking into the consideration that the returned img
from self.transform
is a tensor now … and if not ? do you have any tips or hints how to achieve what I want in this case ?
Thanks a lot … your help is totally appreciated
hey @ptrblck, sorry to disturb
any help here ?
Hi Mostafa,
sorry for the late reply. I’ve seen the thread but have completely forgotten to answer.
RandomCrop
works on images and your current code should also work as you put all image transformations before ToTensor
. So if no image transformations are given, your data will just be transformed to a tensor and normalized. Otherwise it will be cropped randomly etc. before the ToTensor
transform.
Your code makes sense and I would like to stick to it.
Therefore we would need to create our own RandomCrop
class.
We can derive from transforms.RandomCrop
and just add the “counter” from my previous example so that the crop indices will be resampled in every second iteration:
class MyRandomCrop(transforms.RandomCrop):
def __init__(self, size, padding=0, pad_if_needed=False):
super(MyRandomCrop, self).__init__(size, padding, pad_if_needed)
self.counter = 0
self.crop_indices = []
def __call__(self, img):
if self.padding is not None:
img = F.pad(img, self.padding, self.fill, self.padding_mode)
# pad the width if needed
if self.pad_if_needed and img.size[0] < self.size[1]:
img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
# pad the height if needed
if self.pad_if_needed and img.size[1] < self.size[0]:
img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
resample = self.counter % 2 == 0
self.counter += 1
if resample:
self.crop_indices = self.get_params(img, self.size)
i, j, h, w = self.crop_indices
return F.crop(img, i, j, h, w)
transform = transforms.Compose([
MyRandomCrop((10, 10)),
transforms.ToTensor()
])
class MyDataset(Dataset):
def __init__(self, image_paths, transform):
self.image_paths = image_paths
self.transform = transform
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
x = self.transform(image)
return x
def __len__(self):
return len(self.image_paths)
Depending on the torchvision
version you are using you might need to adapt the arguments to RandomCrop
.
Let me know, if that works for you!
no problem @ptrblck
so you say that I create this class on my own, then here I do like this ?
def get_data_loader_folder(input_folder, batch_size, train, new_size=None,
height=256, width=256, num_workers=4, crop=True):
transform_list = [transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5),
(0.5, 0.5, 0.5))]
transform_list = [MyRandomCrop((height, width))] + transform_list if crop else transform_list
transform_list = [transforms.Resize((256, 256))] + transform_list if new_size is not None else transform_list
transform_list = [transforms.RandomHorizontalFlip()] + transform_list if train else transform_list
transform = transforms.Compose(transform_list)
dataset = ImageFolder(input_folder, transform=transform)
loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=train, drop_last=True, num_workers=num_workers)
return loader
correct me if I am wrong
Exactly! Let me know if you get any errors.
but in the arguments, do I need to path width and height as I am doing ?
Yes you are using it right.
I used up the current master branch of torchvision
and tried to post code for 0.2.1
forgetting about some fixes.
Here is the fixed code:
import torchvision.transforms.functional as TF
class MyRandomCrop(transforms.RandomCrop):
def __init__(self, size, padding=0, pad_if_needed=False):
super(MyRandomCrop, self).__init__(size, padding, pad_if_needed)
self.counter = 0
self.crop_indices = []
def __call__(self, img):
if self.padding > 0:
img = TF.pad(img, self.padding)
# pad the width if needed
if self.pad_if_needed and img.size[0] < self.size[1]:
img = TF.pad(img, (int((1 + self.size[1] - img.size[0]) / 2), 0))
# pad the height if needed
if self.pad_if_needed and img.size[1] < self.size[0]:
img = TF.pad(img, (0, int((1 + self.size[0] - img.size[1]) / 2)))
resample = self.counter % 2 == 0
self.counter += 1
if resample:
self.crop_indices = self.get_params(img, self.size)
i, j, h, w = self.crop_indices
print('Using {} {} {} {}'.format(i, j, h, w))
return TF.crop(img, i, j, h, w)
Let me know, if that works now!
I tried it, and it is always giving the same (i,j)
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
Using 0 0 256 256
This is how I have done it
def get_data_loader_folder(input_folder, batch_size, train, new_size=None,
height=256, width=256, num_workers=4, crop=True):
transform_list = [transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5),
(0.5, 0.5, 0.5))]
#transform_list = [transforms.RandomCrop((height, width))] + transform_list if crop else transform_list
transform_list = [MyRandomCrop((height, width))] + transform_list
transform_list = [transforms.Resize((256, 256))] + transform_list if new_size is not None else transform_list
transform_list = [transforms.RandomHorizontalFlip()] + transform_list if train else transform_list
transform = transforms.Compose(transform_list)
dataset = ImageFolder(input_folder, transform=transform)
loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=train, drop_last=True, num_workers=num_workers)
return loader
class ImageFolder(data.Dataset):
def __init__(self, root, transform=None, return_paths=False,
loader=default_loader):
#imgs = sorted(make_dataset(root))
# shuffle implicit pairs
if "test" in root:
imgs = sorted(make_dataset(root))
else:
imgs = shuffle_pairs(sorted(make_dataset(root)))
if len(imgs) == 0:
raise(RuntimeError("Found 0 images in: " + root + "\n"
"Supported image extensions are: " +
",".join(IMG_EXTENSIONS)))
self.root = root
self.imgs = imgs
self.transform = transform
self.return_paths = return_paths
self.loader = loader
def __getitem__(self, index):
path = self.imgs[index]
img = self.loader(path)
if self.transform is not None:
img.save('x_a1.png')
img = self.transform(img)
vutils.save_image(img, 'x_a2.png', nrow=1)
#exit()
if self.return_paths:
return img, path
else:
return img
def __len__(self):
return len(self.imgs)
# RandomCrop modified
class MyRandomCrop(transforms.RandomCrop):
def __init__(self, size, padding=0, pad_if_needed=False):
super(MyRandomCrop, self).__init__(size, padding, pad_if_needed)
self.counter = 0
self.crop_indices = []
def __call__(self, img):
if self.padding > 0:
img = TF.pad(img, self.padding)
# pad the width if needed
if self.pad_if_needed and img.size[0] < self.size[1]:
img = TF.pad(img, (int((1 + self.size[1] - img.size[0]) / 2), 0))
# pad the height if needed
if self.pad_if_needed and img.size[1] < self.size[0]:
img = TF.pad(img, (0, int((1 + self.size[0] - img.size[1]) / 2)))
resample = self.counter % 2 == 0
self.counter += 1
if resample:
self.crop_indices = self.get_params(img, self.size)
i, j, h, w = self.crop_indices
print('Using {} {} {} {}'.format(i, j, h, w))
return TF.crop(img, i, j, h, w)
What is your image size? The random crop size should be smaller than the image size.
Also the Resize
transformation won’t do anything, if your crops are already in the same size.
I am loading 2 datasets,
one has size (400, 300)
, and the other has size (800,600)
concerning the resizing, yes for sure, it won’t do anything, i know this, it can be ignored
That’s strange. I’m testing it with images of size [3, 690, 334]
and it seems to work:
Using 34 337 256 256
Using 34 337 256 256
Using 72 102 256 256
Using 72 102 256 256
Using 57 382 256 256
Using 57 382 256 256
it is weird, I just copied what you wrote without editing anything
Could you run this code and see it the last line returns different values?
img = transforms.ToPILImage()(torch.randn(3, 600, 600))
crop = MyRandomCrop((256, 256))
crop.get_params(img, (256, 256))
If so, could you try this afterwards:
cropped = crop(img)
I did that
for i in range(10):
img = transforms.ToPILImage()(torch.randn(3, 600, 600))
crop = MyRandomCrop((256, 256))
print(crop.get_params(img, (256, 256)))
and I got
(228, 344, 256, 256)
(75, 317, 256, 256)
(190, 65, 256, 256)
(152, 243, 256, 256)
(74, 312, 256, 256)
(246, 237, 256, 256)
(137, 43, 256, 256)
(29, 186, 256, 256)
(320, 141, 256, 256)
(334, 326, 256, 256)