Self-defined function for data augmentation

I define a image flipping class. I am wondering whether this class will realize image flipping during training

#flip image
class HorizontallyFlipline(object):
def __call__(self, img, xx3,xx4):
    if abs(xx3[-1]-0.5)>0.1 or abs(xx4[-1]-0.5)>0.1:    
        return img.transpose(Image.FLIP_LEFT_RIGHT), 1-xx3,1-xx4
    return img, xx3,xx4

My dataset for pytorch is:

self.mytransforms=HorizontallyFlipline()
def __getitem__(self,index):
...
if self.mirror:
     img,lp3,lp4=self.mytransforms(img,lp3,lp4) #does it will flip image during training??????? 
...

What is img? An np array or PIL image or ?

Can’t you use torchvision.transforms.functional.hflip(img)?

I want to segment the image.lp3,lp4 are the points coordinates for ground-truth generation. So the lp3,lp4 need flip during image flipping. I’m wondering whether the code snippet could augment data on training.

#flipping image class
class HorizontallyFlipline(object):
    def __call__(self, img, xx3,xx4):
        if abs(xx3[-1]-0.5)>0.1 or abs(xx4[-1]-0.5)>0.1:    
            return img.transpose(Image.FLIP_LEFT_RIGHT), 1-xx3,1-xx4 # points coordinates flip
        return img, xx3,xx4
class VPDatasetline(data.Dataset):
    ''' 
    The 'DataType' is 'train' or 'val' or 'test' for train,validation and test dataset.
    '''
    def __init__(self,root,transform=None,mirror=False,DataType='train'):
        super(VPDatasetline,self).__init__()
        self.mytransforms=HorizontallyFlipline()
        self.mirror=mirror
        if DataType is 'train':
            self.Folder=os.path.join(root,'train')
            csvFile = open(os.path.join(root,'train593line.csv'), "r")
            reader = csv.DictReader(csvFile)
            next(reader)
            self.file_list=[item for item in reader]
      
        if DataType is 'val':
            self.Folder=os.path.join(root,'val')
            csvFile=open(os.path.join(root,'valine.csv'),'r')
            reader=csv.DictReader(csvFile)
            next(reader)
            self.file_list=[item for item in reader]
        if DataType is 'test':
            testFolder=os.path.join(root,'test')
            self.imgs=[os.path.join(testFolder,img) for img in os.listdir(testFolder)]
            self.file_list=os.listdir(testFolder)
        if transform is None:
            self.transforms = T.Compose(
                [T.Resize((300,300)),
                T.ToTensor(),
                T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])#

                
    def __getitem__(self,index):
        result=self.file_list[index]
        #read image
        img_index=result['name']
        img=Image.open(os.path.join(self.Folder,img_index)) 
        w,h=img.size
        #prepare semantic ground-truth
        imgg = Image.new('L', (300,300), 0)
       #coordinates normalization
        lp3=np.array(eval(result['x3']))/w
        rp3=np.array(eval(result['y3']))/h
        lp4=np.array(eval(result['x4']))/w
        rp4=np.array(eval(result['y4']))/h
        num3=len(lp3)
        num4=len(lp4)
        of=10

        if self.mirror:
            img,lp3,lp4=self.mytransforms(img,lp3,lp4) #flip image
            
        img=self.transforms(img)
        lp3,rp3=(lp3*300).astype(int),(rp3*300).astype(int)
        lp4,rp4=(lp4*300).astype(int),(rp4*300).astype(int)
        #generate polygons for ground-truth
        for i in range(num3-1):
            p1=(lp3[i]-of,rp3[i])
            p2=(lp3[i]+of,rp3[i])
            p3=(lp3[i+1]-of,rp3[i+1])
            p4=(lp3[i+1]+of,rp3[i+1])
            #print(p1,p2,p3,p4)
            ImageDraw.Draw(imgg).polygon((p1,p2,p4,p3), outline=1, fill=1)#
            #if i==0:break
        for i in range(num4-1):
            p1=(lp4[i]-of,rp4[i])
            p2=(lp4[i]+of,rp4[i])
            p3=(lp4[i+1]-of,rp4[i+1])
            p4=(lp4[i+1]+of,rp4[i+1])
            #print(p1,p2,p3,p4)
            ImageDraw.Draw(imgg).polygon((p1,p2,p4,p3), outline=1, fill=1)#
            #if i==0:break
        # grond-truth generation
        mask = np.array(imgg)
        mask=torch.from_numpy(mask)
        return img,mask,img_index
    def __len__(self):

        return len(self.file_list)

Assuming that you set self.mirror only during training, the code looks alright to me.

I am confusing how pytorch actually relize data augmentation during training.

What would you want to know about data augmentation? How it is used in your code or how the underlying implementation is working?
In the former case, you could just use torchvision.transforms to create your transformations and apply them in the Dataset.
In the latter case you might want to look at the source to see the implementation. Let me know, if you need some more information.

I don’t understand how pytorch preserve the original image and then create a flipped image for data augmentation during training. I think transform operation in Dataset doesn’t preserve original image. How does pytorch identify the flipping operation that is for data augmentation, not for others?

Usually you load your images lazily, so that your transformations will only be applied on the loaded image.
However even if you preload your images and apply a transformation like transforms.RandomHorizontalFlip( ) internally PIL.Image.transpose(Image.FLIP_LEFT_RIGHT) will be called, which returns a flipped version of the image so that the original image will still be preserved.

But if you want to integrate Data Augmentation in the def getitem function, how can you return both the original and the transformed image?, in order to actually increase the number of samples

You could return the transformed and the original image tensor, which would increase the batch size.
Alternatively, you could randomly apply the transformation with a probability of 0.5 and double the length of the Dataset in its __len__ method.