RuntimeError: Given groups=1, weight of size [16, 3, 3, 3, 3], expected input[2, 128, 128, 128, 3] to have 3 channels, but got 128 channels instead

I am getting an error.

RuntimeError: Given groups=1, weight of size [16, 3, 3, 3, 3], expected input[2, 128, 128, 128, 3] to have 3 channels, but got 128 channels instead

Below is my code for convolution Autoencorder for 3D images

class Convo3DAE(nn.Module):

  def __init__(self):
    super(Convo3DAE, self).__init__()
    # Encoder
    self.encoder= nn.Sequential()
    self.encoder.add_module('C1', nn.Conv3d(in_channels=3, out_channels=16, kernel_size=3,stride=2 ))
    self.encoder.add_module('Batch Norma 3d', nn.BatchNorm3d(num_features = 16))
    self.encoder.add_module('relu1', nn.ReLU(True))
    self.encoder.add_module('C2', nn.Conv3d(in_channels=16, out_channels=32, kernel_size=3,stride=2 ))
    self.encoder.add_module('relu2', nn.ReLU(True))
    self.encoder.add_module('C3',nn.Conv3d(in_channels=32, out_channels=64, kernel_size=3,stride=2 ))
    self.encoder.add_module('relu3', nn.ReLU(True))
    self.encoder.add_module('C4',nn.Conv3d(in_channels=64, out_channels=128, kernel_size=3,stride=2 ))


    # Decoder
    self.decoder= nn.Sequential()
    self.decoder.add_module('tC1',nn.ConvTranspose3d(in_channels=128, out_channels=64, kernel_size=3,stride=2 ))
    self.encoder.add_module('Batch Norma 3d', nn.BatchNorm3d(num_features = 64))
    self.encoder.add_module('relu1', nn.ReLU(True))
    self.encoder.add_module('tC2', nn.Conv3d(in_channels=64, out_channels=32, kernel_size=3,stride=2 ))
    self.encoder.add_module('relu2', nn.ReLU(True))
    self.encoder.add_module('tC3', nn.Conv3d(in_channels=32, out_channels=16, kernel_size=3,stride=2 ))
    self.encoder.add_module('relu3', nn.ReLU(True))
    self.encoder.add_module('C2', nn.Conv3d(in_channels=16, out_channels=3, kernel_size=3,stride=2 ))




def forward(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

This is the Data Loader

def load_img(img_dir, img_list):
  images=[]
  for i, image_name in enumerate(img_list):
    if (image_name.split('.')[1] == 'npy'):

        image = np.load(img_dir+image_name)

        images.append(image)
  images = np.array(images)

  return(images)

def imageLoader(img_dir, img_list, mask_dir, mask_list, batch_size):

  L = len(img_list)

#keras needs the generator infinite, so we will use while true
  while True:

    batch_start = 0
    batch_end = batch_size

    while batch_start < L:
        limit = min(batch_end, L)

        X = load_img(img_dir, img_list[batch_start:limit])
        Y = load_img(mask_dir, mask_list[batch_start:limit])

        yield (X,Y) #a tuple with two numpy arrays with batch_size samples

        batch_start += batch_size
        batch_end += batch_size

#loop Ends here

batch_size = 2

train_img_datagen = imageLoader(train_img_dir, train_img_list,
                            train_mask_dir, train_mask_list, batch_size)

val_img_datagen = imageLoader(val_img_dir, val_img_list,
                            val_mask_dir, val_mask_list, batch_size)

This is the Training Loop

def train_batch(data1, model, criterion, optimizer):
  model.train()
  data = data1
  optimizer.zero_grad()
  output = model(data)
  loss = criterion(output, data)
  loss.backward()
  optimizer.step()
  return loss.item()

with torch.inference_mode():
  def eval_batch(data, model, criterion):
    model.eval()
    data = data.to(device)
    output = model(data)
    loss = criterion(output, data)
    return loss()

np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

n_epochs = 10

training_loss, test_loss = [], []

for epoch in range(n_epochs):
   training_losses, test_losses = [], []

   for data, _ in train_img_datagen:
      if type(data) is np.ndarray:
        data = torch.from_numpy(data)
      trng_batch_loss = train_batch(data, model, criterion, optimizer)
      training_losses.append(trng_batch_loss.item())
   training_per_epoch_loss = np.array(training_losses).mean()

   for data, _ in val_img_datagen:
       tst_batch_loss = eval_batch(data, model, criterion)
       test_losses.append(tst_batch_loss.item())
   test_per_epoch_loss = np.array(test_losses).mean()

   training_loss.append(training_per_epoch_loss)
   test_loss.append(test_per_epoch_loss)

   if (epoch+1) % 10==0:
    print(f'Epoch: {epoch+1}/{n_epochs}\t| Training loss: {training_per_epoch_loss:.4f} |   ', end='')
    print(f'Test loss: {test_per_epoch_loss:.4f}')

Below is the error

RuntimeError                              Traceback (most recent call last)
<ipython-input-29-f2ea5e4ffbdc> in <cell line: 9>()
     13         if type(data) is np.ndarray:
     14             data = torch.from_numpy(data)
---> 15         trng_batch_loss = train_batch(data, model, criterion, optimizer)
     16         training_losses.append(trng_batch_loss.item())
     17     training_per_epoch_loss = np.array(training_losses).mean()


/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
     603                 self.groups,
     604             )
--> 605         return F.conv3d(
    606             input, weight, bias, self.stride, self.padding, self.dilation, self.groups
    607         )

RuntimeError: Given groups=1, weight of size [16, 3, 3, 3, 3], expected input[2, 128, 128, 128, 3] to have 3 channels, but got 128 channels instead

You are passing your input in the channels-last memory format while PyTorch expect channels-first inputs. .permute the tensor to [batch_size, channels, depth, height, width] via:

x = x.permute(0, 4, 1, 2, 3).contiguous()

and it should work.

RuntimeError: expected scalar type Double but found Float

New Error

Changed Code:

np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

n_epochs = 100

training_loss, test_loss = [], []

for epoch in range(n_epochs):
  training_losses, test_losses = [], []

  for data, _ in train_img_datagen:
    if type(data) is np.ndarray:
        data = torch.from_numpy(data)
        x = data.permute(0, 4, 1, 2, 3).contiguous()
    trng_batch_loss = train_batch(x, model, criterion, optimizer)
    training_losses.append(trng_batch_loss.item())
  training_per_epoch_loss = np.array(training_losses).mean()

  for data, _ in val_img_datagen:
    tst_batch_loss = eval_batch(x, model, criterion)
    test_losses.append(tst_batch_loss.item())
  test_per_epoch_loss = np.array(test_losses).mean()

  training_loss.append(training_per_epoch_loss)
  test_loss.append(test_per_epoch_loss)

  if (epoch+1) % 10==0:
    print(f'Epoch: {epoch+1}/{n_epochs}\t| Training loss: {training_per_epoch_loss:.4f} |   ', end='')
    print(f'Test loss: {test_per_epoch_loss:.4f}')

Transform your inputs and model parameters to the same dtype via .float() (and reassign the result if you are transforming tensors).

class Convo3DAE(nn.Module):
  def __init__(self):
    super(Convo3DAE, self).__init__()
    # Encoder
    self.encoder = nn.Sequential(
        nn.Conv3d(in_channels=3, out_channels=16, kernel_size=3, stride=2),
        nn.BatchNorm3d(num_features=16),
        nn.ReLU(True),
        nn.Conv3d(in_channels=16, out_channels=32, kernel_size=3, stride=2),
        nn.ReLU(True),
        nn.Conv3d(in_channels=32, out_channels=64, kernel_size=3, stride=2),
        nn.ReLU(True),
        nn.Conv3d(in_channels=64, out_channels=128, kernel_size=3, stride=2)
    )

    # Decoder
    self.decoder = nn.Sequential(
        nn.ConvTranspose3d(in_channels=128, out_channels=64, kernel_size=3, stride=2),
        nn.BatchNorm3d(num_features=64),  # Corrected BatchNorm placement
        nn.ReLU(True),
        nn.ConvTranspose3d(in_channels=64, out_channels=32, kernel_size=3, stride=2),
        nn.ReLU(True),
        nn.ConvTranspose3d(in_channels=32, out_channels=16, kernel_size=3, stride=2),
        nn.ReLU(True),
        nn.ConvTranspose3d(in_channels=16, out_channels=3, kernel_size=3, stride=2)
    )

  def forward(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

I am getting am error

RuntimeError: Predictions and targets are expected to have the same shape, but got torch.Size([2, 3, 127, 127, 127]) and torch.Size([2, 3, 128, 128, 128]).