RuntimeError: Expected 4-dimensional input for 4-dimensional weight [6, 1, 5, 5], but got 2-dimensional input of size [8, 562500] instead

I’m currently trying to train a cnn on input and output images, which are grayscale csv.
Here is my code.

import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import numpy as np
import pandas as pd

from PIL import Image
import io



# torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 10
BATCH_SIZE = 256
LR = 0.005         # learning rate
DOWNLOAD_MNIST = False
N_TEST_IMG = 5
# torch.manual_seed(1)    # reproducib
class MyDataset():
	def __init__(self, csv_file,transform=None):
		self.image_paths = pd.read_csv(csv_file, header = 0)
		self.transform = transform

	def __getitem__(self, index):
		#print(self.image_paths[index])
		#image_transformed = load_image(self.image_paths[index])
		#print(index)
		#print(self.image_paths.loc[[index]])
		current = self.image_paths.iloc[index]
		
		
		#print(current.shape)
		#image_transformed = current.iloc[1]
		image = Image.open(current.iloc[0])
		image_transformed = Image.open(current.iloc[1])

		#image, image_transformed = load_image(self.image_paths[index])

		# transformations, e.g. Random Crop etc. 
		# Make sure to perform the same transformations on image and target
		# Here is a small example: https://discuss.pytorch.org/t/torchvision-transfors-how-to-perform-identical-transform-on-both-image-and-target/10606/7?u=ptrblck

		#x, y = TF.to_tensor(image), TF.to_tensor(image_transformed)
		x = torch.from_numpy(np.array(image))
		y = torch.from_numpy(np.array(image_transformed))

		return x, y

	def __len__(self):
		return len(self.image_paths)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


# Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28)
trainDataSet=MyDataset("./train.csv")
testDataSet=MyDataset("./test.csv")
train_loader = Data.DataLoader(trainDataSet, batch_size=BATCH_SIZE, shuffle=True)
test_loader = Data.DataLoader(testDataSet, batch_size=BATCH_SIZE, shuffle=True)


class Autoencoder(nn.Module):
	def __init__(self):
		super(Autoencoder,self).__init__()
		
		self.encoder = nn.Sequential(
			nn.Conv2d(1, 6, kernel_size=5),
			nn.ReLU(True),
			nn.Conv2d(6,16,kernel_size=5),
			nn.ReLU(True))
		self.decoder = nn.Sequential(             
			nn.ConvTranspose2d(16,6,kernel_size=5),
			nn.ReLU(True),
			nn.ConvTranspose2d(6,1,kernel_size=5),
			nn.ReLU(True))
	def forward(self,x):
		x = self.encoder(x)
		x = self.decoder(x)
		return x


autoencoder = Autoencoder()
autoencoder.to(device)
print(autoencoder)


optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LR)
loss_func = nn.MSELoss()

df = pd.DataFrame({'Epoch':[],'Train': [], 
					'Test':[]}) 
count = 1
for epoch in range(EPOCH):

	for step, (x, y) in enumerate(train_loader):
		autoencoder.train()
		running_loss = 0.0
		b_x = x.view(-1, 750*750).to(device)   # batch x, shape (batch, 28*28)
		b_y = y.view(-1, 900*800).to(device)   # batch y, shape (batch, 28*28)

		encoded, decoded = autoencoder(b_x)

		loss = loss_func(decoded, b_y)      # mean square error
		optimizer.zero_grad()               # clear gradients for this training step
		loss.backward()                     # backpropagation, compute gradients
		optimizer.step()                    # apply gradients
		running_loss += loss.item()
		

		
		if step % 100 == 0:
		
			for step, (x, b_label) in enumerate(test_loader):

				autoencoder.eval()
				test_loss = 0.0
				b_x = x.view(-1, 750*750).to(device)   # batch x, shape (batch, 750*750)
				b_y = x.view(-1, 900*800).to(device)   # batch y, shape (batch, 900*800)

				encoded, decoded = autoencoder(b_x)

				loss = loss_func(decoded, b_y)      # mean square error
				optimizer.zero_grad()               # clear gradients for this training step
				loss.backward()                     # backpropagation, compute gradients
				optimizer.step()                    # apply gradients
				test_loss += loss.item()
			print('Epoch: ', epoch + 1, '| train loss: %.4f' % running_loss, '| test loss: %.4f' % test_loss)

			df.loc[count] = [epoch + 1,running_loss, test_loss]
			count = count + 1		


torch.save(autoencoder.state_dict(), "./model.pt")
df.to_csv("./train-test-loss.csv", index = False)

However, I’m getting the error:

Traceback (most recent call last):
  File "train_depth_map.py", line 107, in <module>
    encoded, decoded = autoencoder(b_x)
  File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "train_depth_map.py", line 83, in forward
    x = self.encoder(x)
  File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\container.py", line 117, in forward
    input = module(input)
  File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\conv.py", line 419, in forward
    return self._conv_forward(input, self.weight)
  File "C:\Users\User\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\conv.py", line 416, in _conv_forward
    self.padding, self.dilation, self.groups)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [6, 1, 5, 5], but got 2-dimensional input of size [8, 562500] instead

I don’t understand this error, as it seems like the CNN should be able to output the 2d array. Do I need a densely connected layer, and could someone help me calculate the number of inputs that densely connect layer would need to take?

nn.Conv2d expects a 4-dimensional input tensor as [batch_size, channels, height, width], while you are flattening the input tensor via:

b_x = x.view(-1, 750*750).to(device)

Please, let’s stick to one topic, e.g. this one.

Okay. Thank you! In the future, if I post a question, and make major changes, should I, delete the question, and make a question, or should I edit the question? For example, this time, I tried a completely new train loop.

Since these errors seem to be related, you could post potential workarounds or what you’ve tried to debug the issue in the same thread. Anyway, if you think the major changes create another issue, just post a new thread and in doubt we’ll link to the other one. :wink: