RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 5, 5], but got 2-dimensional input of size [10000, 4761] instead

Hi,

I’m new to pytorch,
trying to use a simple cnn model on my images.
However, seems like I’m not passing the correct input dimensions.

How do I correct that? and to what shape? (code example will be great)

Thanks!


Code below:

import torch.nn as nn

import torch.nn.functional as F

class Net(nn.Module):

def __init__(self):

    super(Net,self).__init__()

    self.layer1 = nn.Sequential(

        nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),

        nn.ReLU(),

        nn.MaxPool2d(kernel_size=2, stride=2))

    self.layer2 = nn.Sequential(

        nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),

        nn.ReLU(),

        nn.MaxPool2d(kernel_size=2, stride=2))

    self.drop_out = nn.Dropout()

    self.fc1 = nn.Linear(69*69, 100)

    self.fc2 = nn.Linear(100, 10)

def forward(self,x):

 out = self.layer1(x)

 out = self.layer2(out)

 out = out.reshape(out.size(0), -1)

 out = self.drop_out(out)

 out = self.fc1(out)

 out = self.fc2(out)

 return out

tranining_tensor = torch.stack([training_ds[i][0] for i in range(len(training_ds)) ])

training_labels = training_ds.labels.data.numpy()

validation_tensor = torch.stack([validation_ds[i][0] for i in range(len(validation_ds)) ])

validation_labels = validation_ds.labels.data.numpy()


def dataset_accuracy(tensor_dataset,targets,net,loss_func):

net.eval()

output = net(tensor_dataset)

loss = loss_func(output,torch.LongTensor(targets))

output = output.cpu().data.numpy()

prediction = np.argmax(output,axis=1)

n_correct = len(np.where(targets==prediction)[0])

n_total = len(targets)

return n_correct/n_total, loss.item()

This is where I get the error:

dataset_accuracy(tranining_tensor,training_labels, net, loss_func)

The input to nn.Conv2d is expected to have the shape [batch_size, channels, height, width], while you are trying to pass a tensor in the shape [10000, 4761] to the model.

You could reshape the input tensor via:

tranining_tensor = tranining_tensor.view(-1, 1, 69, 69)

before passing it to the model.
However, since your batch size seems to be 10000, you might run out of memory quickly and might want to use a Dataset and DataLoader as described in this tutorial.

Also, the in_features on self.fc1 seem to be wrong, since you are downsampling the activation with the pooling layers, so you might need to change it to self.fc1 = nn.Linear(64*17*17, 100).
You might also want to add an activation function between self.fc1 and self.fc2.

Thanks for the quick and very detailed response!
I will try your recommendations and will update on my progress.
Yaniv

Hi,

The training session did crash when using => tranining_tensor = tranining_tensor.view(-1, 1, 69, 69)

What should I change in my CustomDataset? (see below dataloader & CustomDataset code)

See dataloader code below:

training_dataloader = DataLoader(training_ds,batch_size=100, shuffle= True)
valid_dataloader = DataLoader(validation_ds,batch_size=100, shuffle= True)

See CustomDataset code below:

from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):

def __init__(self, path, transform=False, mean=0.092, std=0.127, n_classes=10):

    

    self.transform = transform

    self.path = path

    self.mean = mean

    self.std = std 

    self.filelist = glob.glob(path + '/*.png')

    ##Start code##

    Temp_labels = np.zeros(len(self.filelist))

    for class_i in range(n_classes):

        Temp_labels[ np.array(['class'+str(class_i) in x for x in self.filelist]) ] = class_i

    self.labels = torch.LongTensor(Temp_labels)

    ##End code##

def __len__(self):

   

    return len(self.filelist)

def __getitem__(self, idx):

    if (self.transform):

     img = Image.open(filelist[idx])

     x = transforms.RandomRotation(180)(img) 

     x = torchvision.transforms.Normalize(mean = self.mean, std = self.std)

     x = torchvision.transforms.ToTensor()(img).view(-1)

     y = self.labels[idx]

     return x, y

    else:

     img = Image.open(filelist[idx])

     x =  torchvision.transforms.ToTensor()( img ).view(-1)

     x = (x-self.mean)/self.std

     y = self.labels[idx]

     return x, y

Tried adding to CustomDataset:
def getitem (self, idx):
if (self.transform):
img = Image.open(filelist[idx])
x = transforms.RandomRotation(180)(img)
x = torchvision.transforms.Normalize(mean = self.mean, std = self.std)
x = torchvision.transforms.ToTensor()(img).view(-1)
x = x.view(-1, 1, 69, 69)
y = self.labels[idx]
return x, y

but got:
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 5, 5], but got 5-dimensional input of size [10000, 1, 1, 69, 69] instead

Now getting this:
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [1, 1, 69, 69], but got 5-dimensional input of size [10001, 1, 1, 69, 69] instead

How do I take 1 dimension down from the input? how to I remove the 10001 from the input?

Thanks!

Since your batch size is set to 100 in the DataLoader, you shouldn’t get 10000 or 10001 samples in a single batch.
Could you post the complete code by wrapping it into three backticks ```?

Also, since you are already dealing with images, you don’t need to flatten it inside the CustomDataset and the n reshaping it back to the image tensor shape.

import PIL
import numpy as np
import torch
import torchvision

path_to_training_data = '/content/gdrive/My Drive/H1/Dataset/train'

path_to_validation_data = '/content/gdrive/My Drive/H1/Dataset/validation/'

from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):

    def __init__(self, path, transform=False, mean=0.092, std=0.127, n_classes=10):
        
        self.transform = transform
        self.path = path
        self.mean = mean
        self.std = std 
        self.filelist = glob.glob(path + '/*.png')
        ##Start code##
        Temp_labels = np.zeros(len(self.filelist))
        for class_i in range(n_classes):
            Temp_labels[ np.array(['class'+str(class_i) in x for x in self.filelist]) ] = class_i
        self.labels = torch.LongTensor(Temp_labels)
        ##End code##

    def __len__(self):
       
        return len(self.filelist)


    def __getitem__(self, idx):
        if (self.transform):
         img = Image.open(filelist[idx])
         x = transforms.RandomRotation(180)(img) 
         x = torchvision.transforms.Normalize(mean = self.mean, std = self.std)
         x = torchvision.transforms.ToTensor()(img).view(-1)
         x = x.view(-1, 1, 69, 69)
         y = self.labels[idx]

         return x, y

        else:
         img = Image.open(filelist[idx])
         x =  torchvision.transforms.ToTensor()( img ).view(-1)
         x = (x-self.mean)/self.std
         y = self.labels[idx]

         return x, y


training_ds = CustomDataset(path_to_training_data, transform=True)
validation_ds = CustomDataset(path_to_validation_data)


## lets also go ahead and create the data loaders,
## and set the batch size
training_dataloader = DataLoader(training_ds,batch_size=100, shuffle= True)
valid_dataloader = DataLoader(validation_ds,batch_size=100, shuffle= True)

import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 1, kernel_size=69, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(64*17*17, 100)
        self.fc2 = nn.Linear(100, 10)

    def forward(self,x):
        
     out = self.layer1(x)
     out = self.layer2(out)
     out = out.reshape(out.size(0), -1)
     out = self.drop_out(out)
     out = nn.relu(self.fc1(out))
     out = nn.relu(self.fc2(out))

     return out


## create the instance

net = Net()

loss_func = nn.CrossEntropyLoss()

tranining_tensor = torch.stack([training_ds[i][0] for i in range(len(training_ds))])
training_labels = training_ds.labels.data.numpy()

validation_tensor = torch.stack([validation_ds[i][0] for i in range(len(validation_ds)) ])
validation_labels = validation_ds.labels.data.numpy()


def dataset_accuracy(tensor_dataset,targets,net,loss_func):
    net.eval()
    output = net(tensor_dataset)
    
    loss = loss_func(output,torch.LongTensor(targets))

    output = output.cpu().data.numpy()
    prediction = np.argmax(output,axis=1)

    n_correct = len(np.where(targets==prediction)[0])
    n_total = len(targets)

    return n_correct/n_total, loss.item()

import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-04, weight_decay=1e-5, amsgrad=False)

from tqdm.notebook import tqdm

n_epochs = 50

loss_vs_epoch = []
accuracy_vs_epoch = []

for epoch in tqdm( range(n_epochs) ):
    
    net.train()
    for x,y in training_dataloader:
        optimizer.zero_grad()
        output = net(x)
        training_loss = loss_func(output, y)
        training_loss.backward()  
        optimizer.step()
    net.eval()
    with torch.no_grad():
      for X_batch, y_batch in valid_dataloader:
        y_pred = net(X_batch)
        validation_loss = loss_func(y_pred, y_batch)

    training_accuracy, training_loss = dataset_accuracy(tranining_tensor,training_labels, net, loss_func)    
    validation_accuracy, validation_loss = dataset_accuracy(validation_tensor,validation_labels, net, loss_func)

    loss_vs_epoch.append([training_loss, validation_loss])
    accuracy_vs_epoch.append([training_accuracy, validation_accuracy])
    print("epoch:", epoch, " training_accuracy:",training_accuracy, " training_loss:", training_loss)
    print("epoch:", epoch, " validation_accuracy:",validation_accuracy, " validation_loss:", validation_loss)

Sure. added above.
Thanks!

Hi man,
any updates,
this is kinda urgent…
Thanks again :slight_smile:
Yaniv

I’m not sure, what I had to change, as there were a lot of minor issues, but this code should work:

class CustomDataset(Dataset):
    def __init__(self, mean=0.092, std=0.127, n_classes=10):
        self.mean = mean
        self.std = std 

    def __len__(self):
        return 100

    def __getitem__(self, idx):
        img = transforms.ToPILImage()(torch.randn(69, 69))
        x = transforms.RandomRotation(180)(img) 
        x = torchvision.transforms.Normalize(mean = self.mean, std = self.std)
        x = torchvision.transforms.ToTensor()(img).view(-1)
        x = x.view(1, 69, 69)
        y = torch.randint(0, 10, (1,))
        return x, y


class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(64*17*17, 100)
        self.fc2 = nn.Linear(100, 10)

    def forward(self,x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        print(out.shape)
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))

        return out


def dataset_accuracy(tensor_dataset,targets,net,loss_func):
    net.eval()
    output = net(tensor_dataset)
    
    loss = loss_func(output,torch.LongTensor(targets))

    output = output.cpu().data.numpy()
    prediction = np.argmax(output,axis=1)

    n_correct = len(np.where(targets==prediction)[0])
    n_total = len(targets)

    return n_correct/n_total, loss.item()


training_ds = CustomDataset()
training_dataloader = DataLoader(training_ds,batch_size=100, shuffle= True)

## create the instance
net = Net()
loss_func = nn.CrossEntropyLoss()
tranining_tensor = torch.stack([training_ds[i][0] for i in range(len(training_ds))])
training_labels = torch.randint(0, 10, (100,))

optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-04, weight_decay=1e-5, amsgrad=False)
n_epochs = 1
loss_vs_epoch = []
accuracy_vs_epoch = []

for epoch in range(n_epochs):
    net.train()
    for x,y in training_dataloader:
        optimizer.zero_grad()
        output = net(x)
        training_loss = loss_func(output, y.squeeze())
        training_loss.backward()  
        optimizer.step()

    training_accuracy, training_loss = dataset_accuracy(tranining_tensor,training_labels, net, loss_func)    

You can check the shapes and make sure your data is loaded and processed in the same shape.