RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 784]


I am getting this error. I can’t understand why I am getting this.

I also tried input in shape [batch_size, channels, height, width] as suggested by @ptrblck in another Topic. but it shows another RuntimeError: shape ‘[256, -1, 28, 28]’ is invalid for input of size 784.

As you correctly said, nn.Conv2d expects a 3D (unbatched) or 4D (batched) tensor in the shape [batch_size, channels, height, width] while you are flattening it to a 2D tensor.
How did you reshape the tensor to the 4D one and how did you create a negative shape?

Thanks for your reply.
Actually, I am a beginner in PyTorch. And exploring it from several tutorials and documentation. I am confused with your questions ‘reshape the tensor to the 4D one’ and ‘negative shape’.
Though -1 and 1 both are producing same result here (I don’t know why!)

I’ve attached some snippets from where I came to this point!

transform = test_transform = transforms.Compose([
        transforms.Resize(28),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
])
import glob
i=2
test_images = glob.glob(TEST_PATH+"/*")
img = Image.open(test_images[i]).convert('L')
img = transform(img)

The original error was raised because of the flattening. However, I don’t know what kind of code changes you’ve applied to try to fix it as you haven’t posted them.
Here is an example of a working approach using 3D and 4D input tensors:

transform = test_transform = transforms.Compose([
        transforms.Resize(28),
        transforms.ToTensor(),
        transforms.Normalize([0.,], [1.])
])

img = transforms.ToPILImage()(torch.randint(0, 256, (1, 224, 224), dtype=torch.uint8))
x = transform(img)
print(x.shape)
# torch.Size([1, 28, 28])

model = nn.Conv2d(1, 6, 3)

# unbatched
out = model(x)
print(out.shape)
# torch.Size([6, 26, 26])

# batched
x = x.unsqueeze(0)
print(x.shape)
# torch.Size([1, 1, 28, 28])
out = model(x)
print(out.shape)
# torch.Size([1, 6, 26, 26])

@sajid_snta
Is your problem solved?
I am facing the same error.

Hi, I am also trying to use Conv2D and i am using signals . I am getting the same error that for batched tensor it expcts 4D. Would you suggest change. I am new one to leanr this py tensor.

class ConvNet(nn.Module):
def init(self, num_classes=1, dropout=0.0):
super(ConvNet, self).init()
self.layer1 = nn.Sequential(
nn.ZeroPad2d((15,15,0,0)),
nn.Conv2d(in_channels = 1, out_channels = 20, kernel_size = (1,31), stride = (1,1), padding = 0),
nn.LeakyReLU(),
nn.Dropout(p=dropout))
#self.layer2 = nn.Sequential(
# nn.Conv2d(in_channels = 20, out_channels = 40, kernel_size = (2,1), stride = (2,1), padding = 0),
# nn.BatchNorm2d(40, affine=False),
# nn.LeakyReLU(),
# nn.MaxPool2d(kernel_size = (1,3), stride = (1,2))
# )
#self.layer3 = nn.Sequential(
# nn.Conv2d(in_channels=40, out_channels = 80, kernel_size = (1,21), stride = (1,1)),
# nn.LeakyReLU(),
# nn.Dropout(p=dropout))
#self.pool2 = nn.Sequential(
# nn.MaxPool2d(kernel_size=(1,2), stride=(1,2)))
#self.layer4 = nn.Sequential(
#nn.ZeroPad2d((15,15,0,0)),
# nn.Conv2d(in_channels=80, out_channels = 160, kernel_size = (1,11), stride = (1,1)),
# nn.BatchNorm2d(160, affine=False),
# nn.LeakyReLU(),
# nn.Dropout(p=dropout))
#self.pool3 = nn.Sequential(
# nn.MaxPool2d(kernel_size=(1,3), stride=(1,3)))
#self.layer5 = nn.Sequential(
# nn.Conv2d(in_channels = 160, out_channels = 160, kernel_size = (7,1), stride=(7,1)),
# nn.BatchNorm2d(160, affine=False),
# nn.LeakyReLU())
#self.pool4 = nn.Sequential(
# nn.MaxPool2d(kernel_size=(1,3), stride=(1,3)))
self.linear1 = nn.Sequential(
nn.Linear(160*4, num_classes),
nn.LogSoftmax())

data = setNorm(data) #normalize the data
X_train = x_noisy[:60000] # perform the text/train split
Y_train = PPG[:60000]
X_test = x_noisy[60000:]
Y_test = PPG[60000:]

Hyper parameters

num_classes = 1
learning_rate = 0.0003
weight_decay=0.003
batch_size = 100

def trainModel(net, optimizer, num_epochs, noise):
trainAcc =
testAcc =
for epoch in range(num_epochs):
print("\n Epoch: ", epoch)

    X_epoch, Y_epoch, shufPerm = shuffleData(X_train, Y_train) 
    
    if noise != 0:
        X_epoch = addNoise(X_epoch, noise)
        X_epoch = setNorm(X_epoch)
    running_loss = 0.0
    for i in range(int(len(X_epoch)/batch_size-1)):
        s = i*batch_size
        e = i*batch_size+batch_size
        
        print("Start end shape:{}, End ind shape:{}".format(s,e))
        inputs = X_epoch[s:e].unsqueeze(1).type(torch.FloatTensor)
        labels = Y_epoch[s:e]
        print("inputs shape:{},outputs shape:{}".format(inputs.shape,labels.shape))
        
        #inputs, labels = Variable(inputs.cuda(0)), Variable(labels.type(torch.LongTensor).cuda(0))
        inputs, labels = Variable(inputs), Variable(labels.type(torch.LongTensor))
    
        optimizer.zero_grad()        
        outputs = net(inputs)       
        loss= loss_fn(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += float(loss.item())
        del loss
        del labels
        del inputs 
        del outputs 
    params = ["acc", "auc", "fmeasure"]
    print(params)
    print("Training Loss ", running_loss)
    trainAcc.append(testModel(net, X_train, Y_train))
    testAcc.append(testModel(net, X_test, Y_test))
return trainAcc, testAcc

Define the loss function and optimizer

loss_fn = nn.MSELoss()
net = ConvNet(num_classes=num_classes,dropout=0.0)
optimizer = Adam(net.parameters(), lr=learning_rate, weight_decay = weight_decay)

[trainAcc, testAcc]=trainModel(net, optimizer, num_epochs=100, noise=0)

Could you post the input shape you are using?

@ptrblck

I am also having same -

item_tfms = Resize(460)
batch_tfms = [*aug_transforms(size=224), Normalize.from_stats(*imagenet_stats)]

dls = ImageDataLoaders.from_folder(path, valid_pct=0.2, item_tfms=item_tfms, batch_tfms=batch_tfms)

model = cnn_learner(dls, models.inception_v3, metrics=accuracy)

By looking at the error it seems like, input size is not as expect.

/usr/local/lib/python3.10/dist-packages/fastai/vision/learner.py:301: UserWarning: `cnn_learner` has been renamed to `vision_learner` -- please update your code
  warn("`cnn_learner` has been renamed to `vision_learner` -- please update your code")
/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=Inception_V3_Weights.IMAGENET1K_V1`. You can also use `weights=Inception_V3_Weights.DEFAULT` to get the most up-to-date weights.
  warnings.warn(msg)
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-14-b71b1c4c483f> in <cell line: 1>()
----> 1 model = cnn_learner(dls, models.inception_v3, metrics=accuracy)

20 frames
/usr/local/lib/python3.10/dist-packages/fastai/vision/learner.py in cnn_learner(*args, **kwargs)
    300     "Deprecated name for `vision_learner` -- do not use"
    301     warn("`cnn_learner` has been renamed to `vision_learner` -- please update your code")
--> 302     return vision_learner(*args, **kwargs)
    303 
    304 # %% ../../nbs/21_vision.learner.ipynb 62

/usr/local/lib/python3.10/dist-packages/fastai/vision/learner.py in vision_learner(dls, arch, normalize, n_out, pretrained, weights, loss_func, opt_func, lr, splitter, cbs, metrics, path, model_dir, wd, wd_bn_bias, train_bn, moms, cut, init, custom_head, concat_pool, pool, lin_ftrs, ps, first_bn, bn_final, lin_first, y_range, **kwargs)
    234     else:
    235         if normalize: _add_norm(dls, meta, pretrained, n_in)
--> 236         model = create_vision_model(arch, n_out, pretrained=pretrained, weights=weights, **model_args)
    237 
    238     splitter = ifnone(splitter, meta['split'])

/usr/local/lib/python3.10/dist-packages/fastai/vision/learner.py in create_vision_model(arch, n_out, pretrained, weights, cut, n_in, init, custom_head, concat_pool, pool, lin_ftrs, ps, first_bn, bn_final, lin_first, y_range)
    172         model = arch(pretrained=pretrained)
    173     body = create_body(model, n_in, pretrained, ifnone(cut, meta['cut']))
--> 174     nf = num_features_model(nn.Sequential(*body.children())) if custom_head is None else None
    175     return add_head(body, nf, n_out, init=init, head=custom_head, concat_pool=concat_pool, pool=pool,
    176                     lin_ftrs=lin_ftrs, ps=ps, first_bn=first_bn, bn_final=bn_final, lin_first=lin_first, y_range=y_range)

/usr/local/lib/python3.10/dist-packages/fastai/callback/hook.py in num_features_model(m)
     97         except Exception as e:
     98             sz *= 2
---> 99             if sz > 2048: raise e
    100 
    101 # %% ../../nbs/15_callback.hook.ipynb 50

/usr/local/lib/python3.10/dist-packages/fastai/callback/hook.py in num_features_model(m)
     94         #Trying for a few sizes in case the model requires a big input size.
     95         try:
---> 96             return model_sizes(m, (sz,sz))[-1][1]
     97         except Exception as e:
     98             sz *= 2

/usr/local/lib/python3.10/dist-packages/fastai/callback/hook.py in model_sizes(m, size)
     84     "Pass a dummy input through the model `m` to get the various sizes of activations."
     85     with hook_outputs(m) as hooks:
---> 86         _ = dummy_eval(m, size=size)
     87         return [o.stored.shape for o in hooks]
     88 

/usr/local/lib/python3.10/dist-packages/fastai/callback/hook.py in dummy_eval(m, size)
     78     ch_in = in_channels(m)
     79     x = one_param(m).new(1, ch_in, *size).requires_grad_(False).uniform_(-1.,1.)
---> 80     with torch.no_grad(): return m.eval()(x)
     81 
     82 # %% ../../nbs/15_callback.hook.ipynb 44

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
   1516             return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1517         else:
-> 1518             return self._call_impl(*args, **kwargs)
   1519 
   1520     def _call_impl(self, *args, **kwargs):

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
   1525                 or _global_backward_pre_hooks or _global_backward_hooks
   1526                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527             return forward_call(*args, **kwargs)
   1528 
   1529         try:

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/container.py in forward(self, input)
    213     def forward(self, input):
    214         for module in self:
--> 215             input = module(input)
    216         return input
    217 

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
   1516             return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1517         else:
-> 1518             return self._call_impl(*args, **kwargs)
   1519 
   1520     def _call_impl(self, *args, **kwargs):

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
   1566                 args = bw_hook.setup_input_hook(args)
   1567 
-> 1568             result = forward_call(*args, **kwargs)
   1569             if _global_forward_hooks or self._forward_hooks:
   1570                 for hook_id, hook in (

/usr/local/lib/python3.10/dist-packages/torchvision/models/inception.py in forward(self, x)
    314 
    315     def forward(self, x: Tensor) -> Tensor:
--> 316         outputs = self._forward(x)
    317         return torch.cat(outputs, 1)
    318 

/usr/local/lib/python3.10/dist-packages/torchvision/models/inception.py in _forward(self, x)
    301 
    302     def _forward(self, x: Tensor) -> List[Tensor]:
--> 303         branch3x3 = self.branch3x3_1(x)
    304         branch3x3 = self.branch3x3_2(branch3x3)
    305 

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
   1516             return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1517         else:
-> 1518             return self._call_impl(*args, **kwargs)
   1519 
   1520     def _call_impl(self, *args, **kwargs):

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
   1525                 or _global_backward_pre_hooks or _global_backward_hooks
   1526                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527             return forward_call(*args, **kwargs)
   1528 
   1529         try:

/usr/local/lib/python3.10/dist-packages/torchvision/models/inception.py in forward(self, x)
    403 
    404     def forward(self, x: Tensor) -> Tensor:
--> 405         x = self.conv(x)
    406         x = self.bn(x)
    407         return F.relu(x, inplace=True)

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
   1516             return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1517         else:
-> 1518             return self._call_impl(*args, **kwargs)
   1519 
   1520     def _call_impl(self, *args, **kwargs):

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
   1525                 or _global_backward_pre_hooks or _global_backward_hooks
   1526                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527             return forward_call(*args, **kwargs)
   1528 
   1529         try:

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py in forward(self, input)
    458 
    459     def forward(self, input: Tensor) -> Tensor:
--> 460         return self._conv_forward(input, self.weight, self.bias)
    461 
    462 class Conv3d(_ConvNd):

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
    454                             weight, bias, self.stride,
    455                             _pair(0), self.dilation, self.groups)
--> 456         return F.conv2d(input, weight, bias, self.stride,
    457                         self.padding, self.dilation, self.groups)
    458 

RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 1000]

Suggestion i got from Colab -

The inception_v3 model expects input of size 3 channels, 224x224 pixels. The code is passing data of size 1x1000.
To fix the issue, the input data should be resized to 3x224x224.

inception_v3 expects an input shape of 299x299 and will otherwise fail:

model = models.inception_v3()
x = torch.randn(1, 3, 224, 224)
out = model(x)
# RuntimeError: Calculated padded input size per channel: (3 x 3). Kernel size: (5 x 5). Kernel size can't be greater than actual input size

x = torch.randn(1, 3, 299, 299)
out = model(x)

Seems to be the case, but I don’t see why it’s the case as I’m also not deeply familiar with fastai.

Greetings,
i am new to pytorch and will be glad if you help me. i receive this error and cannot solve it. I am fine tuning resnet50 with TID2013 dataset in order to get the quality of images when uploaded. My code is as follows:
class TID2013Dataset(Dataset):
def init(self, transform=None):
self.root_dir = ‘/content/distorted_images’
self.transform = transforms.Compose([
transforms.Resize((224, 224)), # Resize the images to 224x224 pixels
transforms.ToTensor(), # Convert the images to PyTorch tensors
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize the pixel values
])
with open(‘/content/mos.txt’, ‘r’) as f:
self.mos_scores = [torch.tensor(float(line.strip()), dtype=torch.float32) for line in f]

def __len__(self):
    return len(self.mos_scores)

def __getitem__(self, idx):
    base_img_name = f"{idx//120+1:02}_{(idx//5)%24+1:02}_{idx%5+1}"
    img_name = os.path.join(self.root_dir, "i" + base_img_name + ".bmp")
    if not os.path.exists(img_name):
        img_name = os.path.join(self.root_dir, "I" + base_img_name + ".bmp")
    if not os.path.exists(img_name):
        img_name = os.path.join(self.root_dir, "i" + base_img_name + ".BMP")
    if not os.path.exists(img_name):
        img_name = os.path.join(self.root_dir, "I" + base_img_name + ".BMP")
    image = Image.open(img_name).convert('RGB')
    mos_score = self.mos_scores[idx]

    if self.transform:
        image = self.transform(image)

    return image, mos_score

class LitModule(pl.LightningModule):
def init(self, batch_size, learning_rate):
super().init()
self.batch_size = batch_size
self.learning_rate = learning_rate
self.ssim_loss_weight = 0.4
self.ms_ssim_loss_weight = 0.2
self.perceptual_loss_weight = 0.4
self.mae = torchmetrics.MeanAbsoluteError()
self.rmse = torchmetrics.MeanSquaredError()
backbone = models.resnet50(pretrained = True)
num_filters = backbone.fc.in_features
layers = list(backbone.children())[:-1]
self.feature_extractor = nn.Sequential(*layers)
self.classifier = nn.Linear(num_filters, 1)
self.vgg = vgg16(pretrained=True).features[:16]
for param in self.vgg.parameters():
param.requires_grad = False

def forward(self,x):
representations = self.feature_extractor(x).view(x.size(0), -1)
x = self.classifier(representations)
return x

def calculate_loss(self, label, output):
# SSIM Loss
ssim_loss = torch.mean(1 - SSIM(data_range=255, size_average=True)(label, output))

# MS-SSIM Loss
ms_ssim_loss = torch.mean(1 - ms_ssim(label, output, data_range=255, size_average=True))

# Perceptual Loss
perceptual_loss = F.mse_loss(self.vgg(output), self.vgg(label))

loss = (self.perceptual_loss_weight * perceptual_loss)
return loss

def setup(self, stage = None):
train_lenght = int(len(dataset)*0.8)
test_lenght = len(dataset) - train_lenght
self.train_dataset, self.test_dataset = random_split(dataset, [train_lenght, test_lenght])

def train_dataloader(self):
return torch.utils.data.DataLoader(dataset = self.train_dataset ,shuffle = True, batch_size = 32)

def val_dataloader(self):
return torch.utils.data.DataLoader(dataset = self.test_dataset, shuffle = False, batch_size = 32)

def training_step(self, batch,batch_idx):
data, label = batch
output = self.forward(data)
output = output.squeeze()
loss = self.calculate_loss(label, output)
mae = self.mae(label, output)
rmse = torch.sqrt(self.rmse(label, output))
self.log(‘train_mae’, mae, on_step=True, on_epoch=True)
self.log(‘train_rmse’, rmse, on_step=True, on_epoch=True)
return{‘loss’: loss}

def validation_step(self, batch, batch_idx):
val_data, val_label = batch
val_output = self.forward(val_data)
val_output = val_output.squeeze()
loss = self.calculate_loss(val_label, val_output)
mae = self.mae(val_label, val_output)
rmse = torch.sqrt(self.rmse(val_label, val_output))
self.log(‘val_loss’, loss, on_step=True, on_epoch=True, prog_bar=True)
self.log(‘val_mae’, mae, on_step=True, on_epoch=True)
self.log(‘val_rmse’, rmse, on_step=True, on_epoch=True)
return {‘val_loss’: loss}

def on_validation_epoch_end(self):
self.log(“val_mae_epoch”, self.mae.compute(), prog_bar=True)
self.log(“val_rmse_epoch”, torch.sqrt(self.rmse.compute()), prog_bar=True)
self.mae.reset()
self.rmse.reset()

def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr = self.learning_rate)

Your code is not properly formatted and thus also not executable. It’s also unclear what is failing, so please post a minimal and executable code snippet reproducing the issue so we can debug it.

oh sorry for that and thank you for your time. the clean version is as follows from colab notebook:

!pip install lightning --quiet
pip install pytorch-msssim

import os
import torch
import torchmetrics
from torchvision import transforms
import torchvision.models as models
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, BatchSizeFinder, LearningRateFinder, EarlyStopping
from torch.utils.data import DataLoader, random_split, Dataset
from torch.optim import Adam
from torch import nn
import torch.nn.functional as F
from PIL import Image
from pytorch_msssim import ssim, ms_ssim, SSIM, MS_SSIM
from torchvision.models import vgg16

transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

!wget https://www.ponomarenko.info/tid2013/tid2013.rar
!unrar x /content/tid2013.rar

class TID2013Dataset(Dataset):
    def __init__(self, transform=None):
        self.root_dir = '/content/distorted_images'
        self.transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize the images to 224x224 pixels
        transforms.ToTensor(),  # Convert the images to PyTorch tensors
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the pixel values
    ])
        with open('/content/mos.txt', 'r') as f:
            self.mos_scores = [torch.tensor(float(line.strip()), dtype=torch.float32) for line in f]

    def __len__(self):
        return len(self.mos_scores)

    def __getitem__(self, idx):
        base_img_name = f"{idx//120+1:02}_{(idx//5)%24+1:02}_{idx%5+1}"
        img_name = os.path.join(self.root_dir, "i" + base_img_name + ".bmp")
        if not os.path.exists(img_name):
            img_name = os.path.join(self.root_dir, "I" + base_img_name + ".bmp")
        if not os.path.exists(img_name):
            img_name = os.path.join(self.root_dir, "i" + base_img_name + ".BMP")
        if not os.path.exists(img_name):
            img_name = os.path.join(self.root_dir, "I" + base_img_name + ".BMP")
        image = Image.open(img_name).convert('RGB')
        mos_score = self.mos_scores[idx]

        if self.transform:
            image = self.transform(image)

        return image, mos_score

dataset = TID2013Dataset(transform = transform)

class LitModule(pl.LightningModule):
  def __init__(self, batch_size, learning_rate):
    super().__init__()
    self.batch_size = batch_size
    self.learning_rate = learning_rate
    self.ssim_loss_weight = 0.4
    self.ms_ssim_loss_weight = 0.2
    self.perceptual_loss_weight = 0.4
    self.mae = torchmetrics.MeanAbsoluteError()
    self.rmse = torchmetrics.MeanSquaredError()
    backbone = models.resnet50(pretrained = True)
    num_filters = backbone.fc.in_features
    layers = list(backbone.children())[:-1]
    self.feature_extractor = nn.Sequential(*layers)
    self.classifier = nn.Linear(num_filters, 1)
    self.vgg = vgg16(pretrained=True).features[:16]
    for param in self.vgg.parameters():
        param.requires_grad = False

  def forward(self,x):
    representations = self.feature_extractor(x).view(x.size(0), -1)
    x = self.classifier(representations)
    return x

  def calculate_loss(self, label, output):
    # SSIM Loss
    ssim_loss = torch.mean(1 - SSIM(data_range=255, size_average=True)(label, output))

    # MS-SSIM Loss
    ms_ssim_loss = torch.mean(1 - ms_ssim(label, output, data_range=255, size_average=True))

    # Perceptual Loss
    perceptual_loss = F.mse_loss(self.vgg(output), self.vgg(label))

    loss = (self.perceptual_loss_weight * perceptual_loss)
    return loss

  def setup(self, stage = None):
    train_lenght = int(len(dataset)*0.8)
    test_lenght = len(dataset) - train_lenght
    self.train_dataset, self.test_dataset = random_split(dataset, [train_lenght, test_lenght])

  def train_dataloader(self):
    return torch.utils.data.DataLoader(dataset = self.train_dataset ,shuffle = True, batch_size = 32)

  def val_dataloader(self):
    return torch.utils.data.DataLoader(dataset = self.test_dataset, shuffle = False, batch_size = 32)

  def training_step(self, batch,batch_idx):
    data, label = batch
    output = self.forward(data)
    output = output.squeeze()
    loss = self.calculate_loss(label, output)
    mae = self.mae(label, output)
    rmse = torch.sqrt(self.rmse(label, output))
    self.log('train_mae', mae, on_step=True, on_epoch=True)
    self.log('train_rmse', rmse, on_step=True, on_epoch=True)
    return{'loss': loss}

  def validation_step(self, batch, batch_idx):
    val_data, val_label = batch
    val_output = self.forward(val_data)
    val_output = val_output.squeeze()
    loss = self.calculate_loss(val_label, val_output)
    mae = self.mae(val_label, val_output)
    rmse = torch.sqrt(self.rmse(val_label, val_output))
    self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
    self.log('val_mae', mae, on_step=True, on_epoch=True)
    self.log('val_rmse', rmse, on_step=True, on_epoch=True)
    return {'val_loss': loss}

  def on_validation_epoch_end(self):
    self.log("val_mae_epoch", self.mae.compute(), prog_bar=True)
    self.log("val_rmse_epoch", torch.sqrt(self.rmse.compute()), prog_bar=True)
    self.mae.reset()
    self.rmse.reset()

  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(), lr = self.learning_rate)

early_stop = EarlyStopping(monitor = 'val_loss_epoch', patience = 5, strict = False, verbose = False, mode = 'min')

checkpoint_callback = ModelCheckpoint(monitor = 'val_loss_epoch', dirpath = '/models', filename = 'sample - {epoch:02d} - {val_loss:.2f}', save_top_k = 3, mode = 'min')

model = LitModule(batch_size = 32, learning_rate = 0.001)
trainer = pl.Trainer(callbacks = [checkpoint_callback, BatchSizeFinder(), LearningRateFinder()], accelerator = 'gpu', max_epochs = 200)

trainer.fit(model)

Your code works fine using:

batch_size = 10
model = LitModule(batch_size=batch_size, learning_rate=1.)
x = torch.randn(batch_size, 3, 224, 224)
out = model(x)
print(out.shape)
# torch.Size([10, 1])

so I guess the error is raised in another part of the code, i.e. not the forward method?

Hi there,
sorry to bother on this topic again, but I just cant figure this out. I am attempting transfer learning with Inception V3 using grayscale images. I get the error:
RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [8, 1000]
8 is my batch size, but I have no idea where 1000 comes from, I have approximately 200 images and roughly 160 are used for training.

The Below image is the architecture of my CNN

The below code contains the transforms I have used:

transform1 = transforms.Compose([
transforms.Grayscale(num_output_channels=1), # Ensure images are grayscale
transforms.Resize((299, 299)), # Resize images to 299x299
transforms.ToTensor(), # Convert images to tensor
transforms.Normalize((0.5,), (0.5,)), # Normalize images with mean and std of 0.5
transforms.RandomHorizontalFlip(p=0.5)#, # Randomly flip the image horizontally
])

The error kicks in during training when I pass the images to the model
—> outputs = model(images)

When I test the shape of ‘images’ I get Image shape: torch.Size([8, 1, 299, 299]), which seems to be in the correct format to me. Any advise would be much appreciated please :slight_smile:
Kind regards,
Tim

The error is raised by using self.AuxLogits in your flattened nn.Sequential container while the original implementation does not just use this module in a sequential way as seen here.
You could disable this module via aux_logits=False and it should fix the error.

You won’t be able to load the weights while also disabling the aux_logits at the same time.
The proper approach would be to avoid flattening all children into a flattened nn.Sequential container and to derive a custom architecture achieving what you want in your use case. E.g. if you just want to replace a few modules, replace these directly in your model instead of creating the nn.Sequential approach.

I have changed my code to the below and with some additional input reshaping, that has resolved that issue,
class CustomInceptionCNN(nn.Module):
def init(self):
super(CustomInceptionCNN, self).init()

    # Load the pretrained Inception V3 model
    weights = Inception_V3_Weights.DEFAULT
    inception = inception_v3(weights=weights)#, aux_logits=False)

    inception.aux_logits = False  
    inception.AuxLogits = None    

Thank you very much for the help!

I am still trying to understand the aux_logits solution. Does setting this = false disable/remove the auxilliary classifier?
I understand you said I should not flatten all the children, but does the [:,-2] not ensure that the aux classifier and fully connected layer are removed so it is only the other children that are flattened?

Hi, i’ve been looking on this thread and it seems i haven’t found my answer.

I’ve got the same errors than other.

Base info

Input :

  • Tif images (RGB) dowloaded

Layers :

  • Some conv2d

I’m not sure how to get the correct 3D or 4D input from images…

Model

class RemoteSensingModel(torch.nn.Module):
    def __init__(self,n_neurons = 20) -> None:
        super(RemoteSensingModel, self).__init__()

        # Define a list for each layers {layer, activation function}
        self.layers = []
        self.activations = []

        # Define the activation functions
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()

        # Define the layers
        self.layersInitialization(n_neurons)

        self.classif_layer = torch.nn.Linear(n_neurons, AMOUNT_OF_CLASSES, bias=True)

    def forward(self, x):
        # Compute the forward pass step by step
        # It should return the probability of class 1
        for layer, activation in zip(self.layers, self.activations):
            x = layer(x)
            x = activation(x)

        x = self.classif_layer(x)
        y = self.sigmoid(x)

        # Classify to the classes 
        # y = torch.zeros(x.shape[0], AMOUNT_OF_CLASSES)
        # for i in range(x.shape[0]):
        #     for j in range(AMOUNT_OF_CLASSES):
        #         if x[i][j] > 0.5:
        #             y[i][j] = 1
        #             break
                
        return y

    # Defines all layers, need to be completed, too little
    def layersInitialization(self, n_neurons):
        
        # number_of_steps = IMAGE_WIDTH // 2
        # for i in range(number_of_steps):
        # Convolution layer
        self.layers.append(torch.nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, bias=True))
        self.activations.append(self.relu)
        # Max pooling layer
        self.layers.append(torch.nn.MaxPool2d(kernel_size=2, stride=2))

        
        # Convolution layer
        self.layers.append(torch.nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=True))
        self.activations.append(self.relu)
        # Max pooling layer
        self.layers.append(torch.nn.MaxPool2d(kernel_size=2, stride=2))

        # Flatten layer
        self.layers.append(torch.nn.Flatten())

        # Fully connected layer
        self.layers.append(torch.nn.Linear(128 * 64 * 64, n_neurons))
        self.activations.append(self.relu)

Dataset class


def remove_numbers(input_string):
    result = re.sub(r'\d+', '', input_string)
    return result

def split_line(line):
    parts = line.strip().split('	')
    return parts

class ImageDataset(Dataset):
    def __init__(self, images_path, labels_path):
        self.images_path = images_path
        self.labels_path = labels_path
        self.images = []
        self.labels = []
        self.labels_classes = []
        self.load_data()

    def __len__(self):
        return len(self.images)

    # Returns a tensor compatible conv layer
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        return image, label

    def getImage(self, idx):
        image_base = self.images[idx]
        label = self.labels[idx]
        image = np.array(image_base).flatten() / 255.0

        return image, label

    def load_data(self):
        with open(self.labels_path, 'r') as f:
            lines = f.readlines()
            isFirst = True
            for line in lines:
                if isFirst:
                    isFirst = False
                    parts = split_line(line)
                    self.labels_classes = np.array([x for x in parts[1:]])
                    continue
                parts = split_line(line)
                image_name = parts[0]
                label = np.array([int(x) for x in parts[1:]])
                image_without_number = remove_numbers(image_name)
                image_path = os.path.join(self.images_path, image_without_number,image_name)
                image_path = image_path + ".tif"
                try:
                    image = Image.open(image_path)
                    # Check if the image is the correct size (256x256)
                    if image.size != (256, 256):
                        # raise ValueError(f"Image {image_path} has incorrect size: {image.size}")
                        image = image.resize((256, 256))


                    # image = np.array(image).flatten() / 255.0
                    self.images.append(image)
                    self.labels.append(label)
                except FileNotFoundError:
                    print(f"File not found: {image_path}")

    def get_classes(self):
      return self.labels

    def get_classes_of_image(self, idx):
      classes = []
      for i in range(len(self.labels_classes)):
        if self.labels[idx][i] == 1:
          classes.append(self.labels_classes[i])
      return classes

Trainning process

# prompt: Create the trainning of the model

# Create the model
model = RemoteSensingModel(300)

# Define the loss function
criterion = torch.nn.BCELoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# Create the dataset
dataset = ImageDataset(IMAGES_PATH, LABELS_PATH)

# Split the dataset into train and test sets
train_size = int(0.8 * len(dataset))
print(f"Train size: {train_size}")
test_size = len(dataset) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])



# Create the dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Train the model
num_epochs = 30
count = 0
count_print_rate = 100
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        count += 1
        data = data.float()
        target = target.float()

        # Forward pass
        output = model(data)
        loss = criterion(output, target)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if count % count_print_rate == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
    scheduler.step()

# Save the model
torch.save(model.state_dict(), 'model.pth')

I don’t know what the exact error message is, but note that you are flattening the image here image = np.array(image_base).flatten() / 255.0 (I don’t know if this method is used at all).
If this method is not used, could you post the actual error message including the failing shape, please?

Hi,
I’ve found a way because i had some issues with the data structure from a numpy to a tensor.
Here is my the code i had to use :


def image_to_tensor(image):
    image = np.array(image).flatten() / 255.0
    image = image.reshape(256, 256, 3)
    image = image.transpose((2, 0, 1))
    image_tensor = torch.from_numpy(image)
    return image_tensor