I’m currently trying to write an autoencoder, but the input and output are both different images. As @ptrblck recommended, I wrote a custom dataloader that takes a csv file, where one column is the paths to the input images, and in the same row, yet the other column, the output file path is located.

Here is my dataloader:

```
class MyDataset():
def __init__(self, csv_file,transform=None):
self.image_paths = pd.read_csv(csv_file, header = 0)
self.transform = transform
def __getitem__(self, index):
#print(self.image_paths[index])
#image_transformed = load_image(self.image_paths[index])
#print(index)
#print(self.image_paths.loc[[index]])
current = self.image_paths.iloc[index]
#print(current.shape)
#image_transformed = current.iloc[1]
image = Image.open(current.iloc[0])
image_transformed = Image.open(current.iloc[1])
#image, image_transformed = load_image(self.image_paths[index])
# transformations, e.g. Random Crop etc.
# Make sure to perform the same transformations on image and target
# Here is a small example: https://discuss.pytorch.org/t/torchvision-transfors-how-to-perform-identical-transform-on-both-image-and-target/10606/7?u=ptrblck
#x, y = TF.to_tensor(image), TF.to_tensor(image_transformed)
x = torch.from_numpy(np.array(image))
y = torch.from_numpy(np.array(image_transformed))
return x, y
def __len__(self):
return len(self.image_paths)
```

I tried the dataloader in a loop, and it seems to be loading the batchs in correctly as a 3d array, with the first dimension representing the number of batchs, and the second and third dimensions representing the image width and height.

Here is my cnn

```
class ConvAutoencoder(nn.Module):
def __init__(self):
super(ConvAutoencoder, self).__init__()
## encoder layers ##
# conv layer (depth from 1 --> 16), 3x3 kernels
self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
# conv layer (depth from 16 --> 4), 3x3 kernels
self.conv2 = nn.Conv2d(16, 4, 3, padding=1)
# pooling layer to reduce x-y dims by two; kernel and stride of 2
self.pool = nn.MaxPool2d(2, 2)
## decoder layers ##
## a kernel of 2 and a stride of 2 will increase the spatial dims by 2
self.t_conv1 = nn.ConvTranspose2d(4, 16, 2, stride=2)
self.t_conv2 = nn.ConvTranspose2d(16, 1, 2, stride=2)
def forward(self, x):
## encode ##
# add hidden layers with relu activation function
# and maxpooling after
x = F.relu(self.conv1(x))
x = self.pool(x)
# add second hidden layer
x = F.relu(self.conv2(x))
x = self.pool(x) # compressed representation
## decode ##
# add transpose conv layers, with relu activation function
x = F.relu(self.t_conv1(x))
# output layer (with sigmoid for scaling from 0 to 1)
x = F.sigmoid(self.t_conv2(x))
return x
```

And my train loop:

```
for epoch in range(1):
for step, (x, y) in enumerate(train_loader):
b_x = x.to(device) # batch x, shape (batch, 750*750)
print(b_x.shape)
b_y = y.to(device) # batch y, shape (batch, 900*800)
encoded, decoded = model(b_x)
loss = loss_func(decoded, b_y) # mean square error
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
running_loss += loss.item()
print(running_loss)
```

I get this error: `RuntimeError: Expected 4-dimensional input for 4-dimensional weight [16, 1, 3, 3], but got 3-dimensional input of size [4, 900, 900]. `

How do I change the dimensions of the input from the dataloader from a 3d to a 4d input, so I can train my images with my cnn autoencoder?