Shape mismatch error

ismail · June 9, 2023, 7:57am

i am facing this error ValueError: Input and output must have the same number of spatial dimensions, but got input with spatial dimensions of [256, 256] and output size of torch.Size([256]). Please provide input tensor in (N, C, d1, d2, …,dK) format and output size in (o1, o2, …,oK) format.

this is a ink.detection model code. What seems to be the cause of this error .

here is the code snippet of subvlomedataset and as well as model

class SubvolumeDataset(Dataset):
    def __init__(self, data_paths, mask_paths, label_paths=None, device='cpu', resize_shape=(256, 256)):
        self.data_paths = data_paths
        self.mask_paths = mask_paths
        self.label_paths = label_paths
        self.device = device
        self.resize_shape = resize_shape
        self.subvolume_shape = (1,) + self.resize_shape  

    def __len__(self):
        return len(self.data_paths)

    def __getitem__(self, idx):
        data_path = self.data_paths[idx]
        mask_path = self.get_corresponding_mask_path(data_path)
       

        data = cv2.imread(data_path, cv2.IMREAD_GRAYSCALE)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) if mask_path is not None else None
        

        

        # Resize data and mask to the specified shape
        data = cv2.resize(data, self.resize_shape)
        mask = cv2.resize(mask, self.resize_shape, interpolation=cv2.INTER_NEAREST) if mask is not None else None

        if self.label_paths:
            label_path = self.get_corresponding_label_path(data_path)
            label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)
        else:
            label = None

        data_tensor = torch.from_numpy(data).unsqueeze(0).to(self.device).float()
        mask_tensor = (
           torch.from_numpy(mask).unsqueeze(0).to(self.device).float()
           if mask is not None
           else torch.zeros((1,) + self.subvolume_shape).to(self.device)
        )
        label_tensor = torch.from_numpy(label).unsqueeze(0).to(self.device).float() if label is not None else torch.zeros_like(data_tensor)

        #Resize label_tensor to match the size of data_tensor
        if label_tensor is not None and label_tensor.ndim == 3:
            label_tensor = label_tensor.unsqueeze(0)  # Add an extra dimension for batch
            label_tensor = F.interpolate(label_tensor, size=data_tensor.shape[2:], mode='nearest')
            label_tensor = label_tensor.squeeze(0)  # Remove the extra dimension

        return data_tensor, mask_tensor, label_tensor

    def get_corresponding_mask_path(self, data_path):
        # Generate the corresponding mask path
        mask_path = data_path.replace('surface_volume', 'mask')
        return mask_path

    def get_corresponding_label_path(self, data_path):
        # Generate the corresponding label path
        label_path = data_path.replace('surface_volume', 'inklabels')
        return label_path

This is the con2D model

class SegmentationModel(nn.Module):
    def __init__(self, in_channels):
        super(SegmentationModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.final_conv = nn.Conv2d(64, 1, kernel_size=1)

    def forward(self, x):
        # Assuming x is of shape [batch_size, channels, height, width]
        x1 = F.relu(self.conv1(x))
        x2 = F.relu(self.conv2(x1))
        x3 = F.relu(self.conv3(x2))
        x4 = F.relu(self.conv4(x3))
        x5 = F.relu(self.conv5(x4))
        
        x6 = F.relu(self.upconv1(x5))
        x7 = F.relu(self.upconv2(x6))
        x8 = F.relu(self.upconv3(x7))
        x9 = F.relu(self.upconv4(x8))
        
        output = self.final_conv(x9)

        
        
        return output

i really can’t understand what seems to be issue here…

ptrblck · June 9, 2023, 9:08am

The error seems to be raised in the loss function and based on its message it assume you are using nn.CrossEntropyLoss.
If so, note that the model output is supposed to have the shape [batch_size, nb_classes, *] containing logits and the target the shape [batch_size, *] containing class indices, where * denotes additional dimensions. If you want to pass the target containing probabilities instead its shape would have to match the model output’s shape.

Could you thus check the shape of the model output as well as target and make sure they are as explained above?

ismail · June 9, 2023, 10:45am

As you can see from the mention code that it has used nn.BCEWithLogitsLoss() so what changes do i have to make now?

# Set up the device for GPU usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the model
model = SegmentationModel(in_channels).to(device)

# Define the loss function
criterion = nn.BCEWithLogitsLoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Define the number of epochs
epochs = 10


# Start the training loop
for epoch in range(epochs):
    print(f'Starting epoch {epoch + 1}/{epochs}')
    print('-' * 10)

    train_loss = 0.0

    # Set the model to training mode
    model.train()

    # Iterate over the training data
    for batch in train_dataloader:

        images, masks, labels = batch

        # Extract the images and masks from the batch
        

        # Move the images and masks to the GPU
        images = images.to(device)
        masks = masks.to(device)

        # Clear the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        masks = F.interpolate(masks.float().unsqueeze(1), size=outputs.shape[2:], mode='nearest')
        masks = masks.squeeze(1).long()

        # Calculate the loss
        loss = criterion(outputs, masks)

        # Backward pass
        loss.backward()

        # Update the weights
        optimizer.step()

        # Update the training loss
        train_loss += loss.item() * images.size(0)

    # Print the loss for this epoch
    print(f'Loss: {train_loss / len(train_dataloader):.4f}')

print('Training complete.')

# Save the trained model
torch.save(model.state_dict(), 'segmentation_model.pt')

ismail · June 9, 2023, 10:59am

And I have also tried running code again with nn.CrossEntropyLoss . but it didn’t fix the error either.

ismail · June 9, 2023, 12:04pm

@ptrblck hi could you please tell me whats the possible error here.

ptrblck · June 9, 2023, 3:37pm

It seems the error message might be raised from F.interpolate and you should thus check the provided size as well as the shape of the input tensor.
This topic discusses the same issue.

ismail · June 9, 2023, 6:32pm

could you show me how ?

ptrblck · June 10, 2023, 12:26am

My linked post shows an example how to run F.interpolate. Does it not work for you or is something else failing?

ismail · June 10, 2023, 9:29am

Yes, i have tried to reshape output images and mask using F.interpolate but it didn’t work too, could you please check


# Set up the device for GPU usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the model
model = SegmentationModel(in_channels).to(device)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Define the number of epochs
epochs = 10

# Start the training loop
for epoch in range(epochs):
    print(f'Starting epoch {epoch + 1}/{epochs}')
    print('-' * 10)

    train_loss = 0.0

    # Set the model to training mode
    model.train()

    # Iterate over the training data
    for batch  in train_dataloader:
        
        # Move the images and masks to the GPU
        images = images.to(device)
        masks = masks.to(device)
        labels = labels.to(device)

        # Resize the input tensor to match the spatial dimensions of the target tensor
        **resized_images = F.interpolate(images, size=(4096, 4096), mode='bilinear', align_corners=False)**

        # Resize the mask tensor to match the spatial dimensions of the input tensor
        **resized_masks = F.interpolate(masks.unsqueeze(1).float(), size=(4096, 4096), mode='nearest').squeeze(1).long()**

        # Clear the gradients
        optimizer.zero_grad()

        # Forward pass
        **outputs = model(resized_images)**

        # Resize the output tensor to match the spatial dimensions of the target tensor
        **resized_outputs = F.interpolate(outputs, size=(256, 256), mode='bilinear', align_corners=False)**

        # Calculate the loss
        loss = criterion(resized_outputs, masks)

        # Backward pass
        loss.backward()

        # Update the weights
        optimizer.step()

        # Update the training loss
        train_loss += loss.item() * images.size(0)

    # Print the loss for this epoch
    print(f'Loss: {train_loss / len(train_dataloader):.4f}')

print('Training complete.')

# Save the trained model
torch.save(model.state_dict(), 'segmentation_model.pt')

ismail · June 10, 2023, 11:36am

@ptrblck please have a look at this.

ptrblck · June 10, 2023, 4:28pm

I don’t know which shapes are initially used but the code works for me:

images = torch.randn(1, 3, 224, 224)
# Resize the input tensor to match the spatial dimensions of the target tensor
resized_images = F.interpolate(images, size=(4096, 4096), mode='bilinear', align_corners=False)

masks = torch.randint(0, 2, (1, 224, 224))
# Resize the mask tensor to match the spatial dimensions of the input tensor
resized_masks = F.interpolate(masks.unsqueeze(1).float(), size=(4096, 4096), mode='nearest').squeeze(1).long()

outputs = torch.randn(1, 10, 4096, 4096)
# Resize the output tensor to match the spatial dimensions of the target tensor
resized_outputs = F.interpolate(outputs, size=(256, 256), mode='bilinear', align_corners=False)

I would also like to ask to for some patience.