Given groups=1, weight of size [64, 1, 3, 3], expected input[1, 19, 304, 304] to have 1 channels, but got 19 channels instead

Hello everyone, when i try to run my code i got this error message :

Given groups=1, weight of size [64, 1, 3, 3], expected input[1, 19, 304, 304] to have 1 channels, but got 19 channels instead

I asked to my teacher and she said that you should write a code about permute but i couldn’t write.Can anyone help me. Here is my codes.

class CustomDataset(Dataset): 
  def __init__(self, image_paths, target_paths, transform, train=True): # initial logic happens like transform 
      #self.image_paths = image_paths
      self.target_paths = target_paths 
      self.transforms = transforms.ToTensor()

  def __getitem__(self, index):

      #image = Image.open(self.image_paths[index]) 
      mask = np.load(self.target_paths[index]) 
      t_image = self.transforms(mask) 
            
      return t_image

  def __len__(self): # return count of sample we have 
    
      return len(self.target_paths)

image_paths = glob.glob("/content/drive/My Drive/Colab Notebooks/CustomDataset/SplitTrainingDataset5_0/test/images/*npy")
target_paths = glob.glob("/content/drive/My Drive/Colab Notebooks/CustomDataset/SplitTrainingDataset5_0/test/1st_manual/*npy")
len_data = len(target_paths)
#print(target_paths)
train_size = 0.6
train_image_paths = image_paths[:int(len_data*train_size)]
train_mask_paths = target_paths[:int(len_data*train_size)]
test_mask_paths = target_paths[int(len_data*train_size):]
test_image_paths = image_paths[int(len_data*train_size):]
transform = transforms.Compose(
            [transforms.Resize([100, 100,]),
            transforms.ToTensor()])


transform_images = ['Numpy_AS_1.npy', 'Numpy_AS_2.npy','Numpy_AS_3.npy','Numpy_AS_4.npy',
                    ]  # apply special transformation only on these images

transform_masks = ['Numpy_AS_GT_1.npy','Numpy_AS_GT_2.npy','Numpy_AS_GT_3.npy','Numpy_AS_GT_4.npy',
                   ] 

train_dataset = CustomDataset(train_image_paths, train_mask_paths, transform=transform, train=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True,  num_workers=0)

test_dataset = CustomDataset(test_image_paths, test_mask_paths, transform=transform, train=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=0)

The model :

class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)


class Unflatten(nn.Module):
    def __init__(self, channel, height, width):
        super(Unflatten, self).__init__()
        self.channel = channel
        self.height = height
        self.width = width

    def forward(self, input):
        return input.view(input.size(0), self.channel, self.height, self.width)


class ConvVAE(nn.Module):

    def __init__(self, latent_size):
        super(ConvVAE, self).__init__()

        self.latent_size = latent_size

        self.encoder = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            Flatten(),
            nn.Linear(80000, 1024),
            nn.ReLU()
        )

        # hidden => mu
        self.fc1 = nn.Linear(1024, self.latent_size)

        # hidden => logvar
        self.fc2 = nn.Linear(1024, self.latent_size)

        self.decoder = nn.Sequential(
            nn.Linear(self.latent_size, 1024),
            nn.ReLU(),
            nn.Linear(1024,80000),
            nn.ReLU(),
            Unflatten(128, 25, 25),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1),
            nn.Sigmoid()
        )

    def encode(self, x):
        h = self.encoder(x)
        mu, logvar = self.fc1(h), self.fc2(h)
        return mu, logvar

    def decode(self, z):
        z = self.decoder(z)
        return z

    def reparameterize(self, mu, logvar):
        if self.training:
            std = torch.exp(0.5 * logvar)
            eps = torch.randn_like(std)
            return eps.mul(std).add_(mu)
        else:
            return mu

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

and the other parts:

def loss_function(recon_x, x, mu, logvar):
    # reconstruction loss
    BCE = F.binary_cross_entropy(recon_x.view(-1, 784), x.view(-1, 784), reduction='sum')

    # KL divergence loss
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return BCE + KLD

def train(epoch, model, train_loader, optimizer, args):
    model.train()
    train_loss = 0

    for batch_idx, (test) in tqdm(enumerate(train_loader), total=len(train_loader), desc='train'):
        test = test.to(device)

        optimizer.zero_grad()
        recon_batch, mu, logvar = model(test)

        loss = loss_function(recon_batch, test, mu, logvar)
        train_loss += loss.item()

        loss.backward()
        optimizer.step()

    train_loss /= len(train_loader.dataset)

    return train_loss

def test(epoch, model, test_loader, writer, args):
    model.eval()
    test_loss = 0

    with torch.no_grad():
        for batch_idx, (test) in tqdm(enumerate(test_loader), total=len(test_loader), desc='test'):
            test = test.to(device)

            recon_batch, mu, logvar = model(test)

            test_loss += loss_function(recon_batch, test, mu, logvar).item()

            if batch_idx == 0:
                n = min(test.size(0), 8)
                comparison = torch.cat([test[:n], recon_batch.view(args.batch_size, 1, 352, 352)[:n]]).cpu()
                img = make_grid(comparison)
                writer.add_image('reconstruction', img, epoch)
                save_image(comparison.cpu(), 'results/reconstruction_' + str(epoch) + '.png', nrow=n)

    test_loss /= len(test_loader.dataset)

    return test_loss

 def save_checkpoint(state, is_best, outdir='results'):
    checkpoint_file = os.path.join(outdir, 'checkpoint.pth')
    best_file = os.path.join(outdir, 'model_best.pth')
    torch.save(state,checkpoint_file )
    if is_best:
        shutil.copyfile(checkpoint_file, best_file)
def main():
    parser = argparse.ArgumentParser(description='Convolutional VAE MNIST Example')
    parser.add_argument('--result_dir', type=str, default='results', metavar='DIR',
                        help='output directory')
    parser.add_argument('--batch_size', type=int, default=100, metavar='N',
                        help='input batch size for training (default: 128)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--resume', default='', type=str, metavar='PATH',
                        help='path to latest checkpoint (default: None')

    # model options
    parser.add_argument('--latent_size', type=int, default=32, metavar='N',
                        help='latent vector size of encoder')
    parser.add_argument("-f", "--fff", help="a dummy argument to fool ipython", default="1")
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}

    train_dataset = CustomDataset(train_image_paths, train_mask_paths, transform=transform, train=True)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True,  num_workers=0)

    test_dataset = CustomDataset(test_image_paths, test_mask_paths, transform=transform, train=False)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

    model = ConvVAE(args.latent_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    start_epoch = 0
    best_test_loss = np.finfo('f').max

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print('=> loading checkpoint %s' % args.resume)
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch'] + 1
            best_test_loss = checkpoint['best_test_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print('=> loaded checkpoint %s' % args.resume)
        else:
            print('=> no checkpoint found at %s' % args.resume)

    writer = SummaryWriter()

    for epoch in range(start_epoch, args.epochs):
        train_loss = train(epoch, model, train_loader, optimizer, args)
        test_loss = test(epoch, model, test_loader, writer, args)

        # logging
        writer.add_scalar('train/loss', train_loss, epoch)
        writer.add_scalar('test/loss', test_loss, epoch)

        print('Epoch [%d/%d] loss: %.3f val_loss: %.3f' % (epoch + 1, args.epochs, train_loss, test_loss))

        is_best = test_loss < best_test_loss
        best_test_loss = min(test_loss, best_test_loss)
        save_checkpoint({
            'epoch': epoch,
            'best_test_loss': best_test_loss,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }, is_best)

        with torch.no_grad():
            sample = torch.randn(64, 32).to(device)
            sample = model.decode(sample).cpu()
            img = make_grid(sample)
            writer.add_image('sampling', img, epoch)
            save_image(sample.view(64, 1, 28, 28), 'results/sample_' + str(epoch) + '.png')


if __name__ == '__main__':
    main()

From the error message, it says that the input is of size 304 x 304, from the transforms, it says to resize to 100 x 100. Did you miss some code while posting?

Actually no, because i just try to resize the dataset ,but it didnt work anyway :grin:
so this could be the second problem but first i need to change the sort of channels.

Either way, I see that you are using some kind of mask as input.
Can you check what’s the dimension of the data from dataloader?
I suspect that the number of channels in the mask data is 19.

In the first conv layer, the input channels expected is 1, as given in error message.

Dimensions of the data is unfortunately not same, so thats why i try to resize them but it seems didn’t work.