Problems with my neural network

i’m quite new on AI and even more to Pytorch
i’m trying to create an AI that guesses the age of people using an torax radiography.
I’ve already went a long way fighting to create an custom data set, and even now i’m not sure if it’s working properly, as i’m trying to use transform to make my pictures (1024x1024) smaller (256x256) but when i print my image shape it’s always 1024x1024. Another problem are my channels, that i’m fighting to make it go to 1, but keep being 4.

As I said, i’m quite new to it, and I’m guessing I tryed to bite more than I can chew…
anyway i’ll keep trying, and I hope some of your answers can make me understand a bit more of this complex beauty that are the AIs.

For now the error I’m trying to solve, happens on the train function on this line:
loss = loss_fn(outputs, labels.float())
error:
Exception has occurred: ValueError
Expected input batch_size (160) to match target batch_size (10).

I’ll post my complete code here:


transform_image = torchvision.transforms.Compose([ 
    torchvision.transforms.Grayscale(num_output_channels=1),
    torchvision.transforms.CenterCrop((256, 256)),
    torchvision.transforms.Resize((256, 256)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,0.5,0.5,0.5),(0.5,0.5,0.5,0.5))])

#Creating the custom data set:
class CustomImageDataset(Dataset):
    csv_file = "path_to_csv"
    img_dir = "path_to_imgs"


    def __init__(self, csv_file, img_dir, transform=transform_image, target_transform=None):
        self.img_labels = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
            return len(self.img_labels)
    
    def __getitem__(self, idx):
        img_name = str(self.img_labels.iloc[idx, 0]).zfill(6)  # Add leading zeros
        img_path = os.path.join(self.img_dir, img_name)+'.png'
        image = read_image(img_path)
        label = torch.tensor(self.img_labels.iloc[idx, 1])
        print(f' aqui ta o shape da imagem{image.shape} e o size {image.size()}')
        return image.float(), label

train_dataloader = CustomImageDataset(csv_file,img_dir, transform_image, target_transform=None) # shuffle to false

#snaping the code to test/validation
training_data_size = 0.8

train_size = int(training_data_size * len(train_dataloader))
test_size = len(train_dataloader) - train_size

training_train_dataloader, validation_dataloader = torch.utils.data.random_split(train_dataloader,[train_size,test_size])


training_train_dataloader = (DataLoader(training_train_dataloader, batch_size=10, shuffle=False))
validation_dataloader = (DataLoader(validation_dataloader, batch_size=10, shuffle=False))


# Define a convolution neural network
###
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        
        self.conv1 = nn.Conv2d(4, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(256*32*32, 10)

    def forward(self, input):
        output = F.relu(self.conv1(input))      
        output = self.pool(F.relu(self.conv2(output)))
        output = self.pool(F.relu(self.conv3(output)))
        output = self.pool(F.relu(self.conv4(output)))
        output = output.view(-1, 256 * 32 * 32)
        output = self.fc1(output)

        return output.squeeze()


# Instantiate a neural network model 
model = Network()

# Define the loss function with Classification Cross-Entropy loss and an optimizer with Adam optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

from torch.autograd import Variable

# Function to save the model
def saveModel():
    path = "./myFirstModel.pth"
    torch.save(model.state_dict(), path)




# Training function. We simply have to loop over our data iterator and feed the inputs to the network and optimize.
def train(num_epochs):
    
    fraction = 0.01 #1%
    num_iterations = int(len(training_train_dataloader) * fraction)

    best_accuracy = 0.0

    # Define your execution device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("The model will be running on", device, "device")

    # Convert model parameters and buffers to CPU or Cuda
    model.to(device)

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        running_acc = 0.0

        for i, (images, labels) in enumerate(training_train_dataloader, 0):
            if i>=num_iterations:
                break

            # get the inputs
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))

            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model(images)
            # compute the loss based on model output and real labels
            print(f"Input shape: {images.shape}")
            print(f"Target shape: {labels.shape}")

            loss = loss_fn(outputs, labels.float())
            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()

            # Let's print statistics for every 1,000 images
            running_loss += loss.item()     # extract the loss value
            if i % 1000 == 999:    
                # print every 1000 (twice per epoch) 
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 1000))
                # zero the loss
                running_loss = 0.0

        # Compute and print the average accuracy fo this epoch when tested over all 10000 test images
        accuracy = testAccuracy()
        print('For epoch', epoch+1,'the test accuracy over the whole test set is %d %%' % (accuracy))
        
        # we want to save the model if the accuracy is the best
        if accuracy > best_accuracy:
            saveModel()
            best_accuracy = accuracy

Based on your code snippet it seems you are initializing the transformation and passing it to the custom Dataset, but are never using it in the __getitem__ to actually apply the transformation on each sample.
Something like this is the common approach:

    def __getitem__(self, idx):
        img_name = str(self.img_labels.iloc[idx, 0]).zfill(6)  # Add leading zeros
        img_path = os.path.join(self.img_dir, img_name)+'.png'
        image = read_image(img_path)
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(self.img_labels.iloc[idx, 1])
        if self.target_transform:
            label = self.target_transform(label)
        print(f' aqui ta o shape da imagem{image.shape} e o size {image.size()}')
        return image.float(), label

This error is usually raised if you are using an invalid reshape of the data and thus changing the batch size.
This line of code is most likely causing it:

output = output.view(-1, 256 * 32 * 32)

since you are moving the “rest” into the batch dimension.
Replace it with x = x.view(x.size(0), -1) and fix shape mismatches in later layers if these are raised.

oh i didn’t know we should call it again, thanks!

but now i get this error on this part:

if self.transform:
      image = self.transform(image)

Exception has occurred: TypeError
Input image tensor permitted channel values are [1, 3], but found 4

i cannot seem the bring it to only one channel (as i’m using Grayscale images)

That’s unexpected as the Graycale transformation works for me using a 4-channel input image:

gray = torchvision.transforms.Grayscale(num_output_channels=1)

img = torchvision.transforms.ToPILImage()(torch.randn(4, 224, 224))
print(np.array(img).shape)
# (224, 224, 4)

out = gray(img)
print(np.array(out).shape)
# (224, 224)

Which torchvision version are you using?