i followed your advice and performed augmentation manually inside the **getitem** function of dataset class.

still i was confused how to perform validation split because i supply the random_split function with the custom dataset class that as i said already performs augmentation.

further, just to check for other errors, i tried to run the program with both validation and and train being augmented.*** there is a strange error occuring after TF.rotate(mask) that adds non zeros to the mask. i can’t find the exact location but it’s somewhere in the transform to pil or roatate.***

```
import os
import numpy as np
from pandas.io.parsers import read_csv
from sklearn.utils import shuffle
import torch
from torch.utils.data import Dataset
import torchvision.transforms.functional as TF
class FacialKeypoints(Dataset):
def __init__(self, test=False, cols=None,FTRAIN = 'data/Q3/training.csv', FTEST = 'EX1/Q3/test.csv', transform_vars=None):
fname = FTEST if test else FTRAIN
df = read_csv(os.path.expanduser(fname)) # load pandas dataframe
# The Image column has pixel values separated by space; convert
# the values to numpy arrays:
df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))
if cols: # get a subset of columns
df = df[list(cols) + ['Image']]
print('number of values in each column: ', df.count()) # prints the number of values for each column
df = df.dropna() # drop all rows that have missing values in them
X = np.vstack(df['Image'].values) / 255. # scale pixel values to [0, 1]
X = X.astype(np.float32)
image_size = int(np.sqrt(X.shape[1]))
Y = []
if not test: # only FTRAIN has any target columns
y = df[df.columns[:-1]].values
y2 = y.reshape(y.shape[0],15,2)
for coords in y2:
mask = np.zeros((image_size,image_size))
for pair in coords:
pair = pair.round().astype(int)
mask[pair[1]-1,pair[0]-1]=1
Y.append(mask)
Y = np.array(Y)
y = (y - 48) / 48 # scale target coordinates to [-1, 1]
X, y, Y = shuffle(X, y, Y, random_state=42) # shuffle train data
y = y.astype(np.float32)
else:
y = None
self.X = torch.tensor(X,dtype=torch.float32)
self.transform_vars = transform_vars
self.y = torch.tensor(y)
self.Y = torch.tensor(Y,dtype=torch.float32)
print('finished loading')
def __len__(self):
return len(self.X)
def transform(self,image, mask):
image = image.reshape(96,96)
flip_prob = self.transform_vars['flip_probability']
rotate_prob = self.transform_vars['rotate_probability']
print('before',torch.nonzero(mask, as_tuple=False).reshape(-1).shape[0])
if torch.rand(1)>flip_prob:
image = TF.hflip(image)
mask = TF.hflip(mask)
if torch.rand(1)<rotate_prob:
avg_pixel = image.mean()
degrees = self.transform_vars['degrees']
deg = int(torch.rand(1).item() * degrees - degrees)
image_r = TF.to_tensor(TF.rotate(TF.to_pil_image(image),deg)).squeeze()
image_r[(image_r==0) * (image!=0)] = avg_pixel
image = image_r
mask = TF.to_pil_image(mask)
print('after pil', mask.ImageStat.sum)
mask = TF.rotate(mask, deg)
print('after rotate', mask.ImageStat.sum)
mask = TF.to_tensor(mask).squeeze()
#mask = TF.to_tensor(TF.rotate(TF.to_pil_image(mask), deg)).squeeze()
print('after tensor',torch.nonzero(mask, as_tuple=False).reshape(-1).shape[0])
return image, mask
def update_target(self,mask):
keypoints = torch.nonzero(mask,as_tuple=False).reshape(-1)
keypoints = torch.from_numpy((keypoints.numpy() - 48) / 48)
return keypoints
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
image = self.X[idx]
keypoints = self.y[idx]
mask = self.Y[idx]
if self.transform_vars['is']:
image, mask = self.transform(image, mask)
keypoints = self.update_target(mask)
print(keypoints.shape)
return {'image':image, 'keypoints':keypoints}
else:
return {'image':image,'keypoints':keypoints}
```

this is the dataset class that loads the data, turns image string to values, removes nans and forms a “mask” which is a matrix of zeros and ones where there should be a facial keypoint. in the augmentation part, both the image and the mask go through the same transformations and then the mask goes through one more transformation (updateKeypoint) to become a vector of size 30 which is the target.

the main script:

```
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from preprocess import FacialKeypoints
import numpy as np
from torch.utils.data.dataset import random_split
transformed_dataset = FacialKeypoints(transform_vars={'is':True,'degrees':20,'flip_probability':0.5,'rotate_probability':0.8})
num_train = int(np.ceil(len(transformed_dataset) * 0.85))
num_val = int(len(transformed_dataset) - num_train)
batch_size = 16
trainset,valset = random_split(transformed_dataset,[num_train,num_val])
trainloader = DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=0)
valoader = DataLoader(valset, batch_size=batch_size,
shuffle=True, num_workers=0)
device = torch.device('cuda')
model2 = nn.Sequential(
nn.Conv2d(1,32,3),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.ReLU(),
nn.Conv2d(32,64,2),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.ReLU(),
nn.Conv2d(64,128,2),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.ReLU(),
nn.Flatten(),
nn.Linear(15488,500),
nn.ReLU(),
nn.Linear(500,500),
nn.ReLU(),
nn.Linear(500,30)
)
model2.to(device)
total_loss = {'train':[],'val':[]}
criterion1 = nn.MSELoss()
criterion2 = nn.MSELoss()
optimizer2 = torch.optim.Adam(model2.parameters(),lr=0.001)
total_loss = {'train':[],'val':[]}
for epoch in range(100):
print('in epoch {}/100 :'.format(epoch+1))
for sample in trainloader:
losses = []
input = sample['image'].to(device)
batch = input.shape[0]
target = sample['keypoints'].to(device)
optimizer2.zero_grad()
output = model2(input)
loss2 = criterion2(output,target)
loss2.backward()
optimizer2.step()
losses.append(loss2.data)
a = np.sum(losses)
total_loss['train'].append(a)
print('train loss = {}'.format(a))
for sample in valoader:
with torch.no_grad():
losses = []
input = sample['image'].to(device)
batch = input.shape[0]
input = input.view([batch, 1, 96, 96])
target = sample['keypoints'].to(device)
output = model2(input)
loss2 = criterion2(output, target)
losses.append(loss2.data)
a = np.sum(losses)
total_loss['val'].append(a)
'''def check_sample(loader=valoader,model=model2,device=device):
device2 = torch.device('cpu')
plots = 16//3
x = next(iter(loader))
y_true = x['keypoints']
y_true = y_true.reshape(16,15,2)
x = x['image'].to(device)
x = x.view(16,1,96,96)
y = model(x)
y = y.reshape(16,15,2).to(device2)
x = x.to(device2)
fig,ax = plt.subplots(3,plots)
k=0
for i in range(plots):
for j in range(3):
ax[i,j].scatter(y_true[k,:,0].detach().numpy(),y_true[k,:,1].detach().numpy())
ax[i,j].imshow(x[k].squeeze())
ax[i,j].scatter(y[k,:,0].detach().numpy(),y[k,:,1].detach().numpy())
k=k+1
plt.show()
'''
```

finally i get a weird error about the size of batch, which i suspect is because of the target changing size.

line 55, in default_collate

return torch.stack(batch, 0, out=out)

RuntimeError: stack expects each tensor to be equal size, but got [30] at entry 0 and [26] at entry 3

would really appreciate any hint what could cause this error, and and also how to augment the data only for training and not for validation