ValueError: Using a target size (torch.Size([100, 2, 1])) that is different to the input size (torch.Size([100, 1])) is deprecated. Please ensure they have the same size

Hello. I am trying to solve age and gender prediction using CNN in PyTorch. I am using classes for age as well as for gender distribution. I am using UTKFace dataset. According to dataset annotation [0] is for age, [1] is for gender i am consider.
‘’’

labels age, gender, ethnicity as per image notation

image_paths = []
age_labels = []
gender_labels = []

for filename in tqdm(os.listdir(base_dir)):
image_path = os.path.join(base_dir, filename)
temp = filename.split(‘_’)
age = int(temp[0])
gender = int(temp[1])
image_paths.append(image_path)
age_labels.append(age)
gender_labels.append(gender)

convert to dataframe

df = pd.DataFrame()
df[‘image’], df[‘age’], df[‘gender’] = image_paths, age_labels, gender_labels
df.head()

Age class labels

def class_labels_reassign(age_labels):

if 1 <= age_labels <= 2:
    return 0
elif 3 <= age_labels <= 9:
    return 1
elif 10 <= age_labels <= 20:
    return 2
elif 21 <= age_labels <= 25:
    return 3
elif 26 <= age_labels <= 27:
    return 4
elif 28 <= age_labels <= 31:
    return 5
elif 32 <= age_labels <= 36:
    return 6
elif 37 <= age_labels <= 45:
    return 7
elif 46 <= age_labels <= 54:
    return 8
elif 55 <= age_labels <= 65:
    return 9
else:
    return 10

gender label reassign

def class_gender_labels(gender_labels):

if gender_labels == 0:
    return 0
else:
    return 1

map the age class labels as per ages

df[‘target_age’] = df[‘age’].map(class_labels_reassign) #= df[‘gender’].map(class_gender_labels)
df[‘target_gender’] = df[‘gender’].map(class_gender_labels)
df.head()

target_age = np.array(df[‘target_age’])
target_gender = np.array(df[‘target_gender’])

creating the labels list of age and gender from list of ages, and genders

labels = []

i = 0
while i < len(target_age):
label = []
label.append([target_age[i]])
label.append([target_gender[i]])
labels.append(label)
i += 1

convert labels list to numpy array

age_gender_labels = np.array(labels)
print(type(age_gender_labels))
age_gender_labels.shape
‘’’

‘’’
X_train, X_test, y_train, y_test = train_test_split(images, age_gender_labels, test_size=0.3, random_state=42, shuffle=True )

X_test, X_for_pred, y_test, y_for_pred = train_test_split(X_test, y_test, test_size=0.1, random_state=42, shuffle=True)

train=torch.utils.data.TensorDataset(x_train_tensor,y_train_label)
trainloader=torch.utils.data.DataLoader(train,batch_size=100,shuffle=True, num_workers=1)

test=torch.utils.data.TensorDataset(x_test_tensor,y_test_label)
testloader=torch.utils.data.DataLoader(test,batch_size=100,shuffle=True, num_workers=1)

for_pred=torch.utils.data.TensorDataset(x_for_pred_label,y_for_pred_label)
for_pred_loader=torch.utils.data.DataLoader(for_pred,batch_size=100,shuffle=False)

Build Model

import torch.nn as nn
import torch.nn.functional as F

class CNNModel(nn.Module):
def init(self):
super(CNNModel, self).init()

    self.cnn1 = nn.Conv2d(in_channels = 1, out_channels = 16, kernel_size = 3, padding = 1)
    self.relu = nn.ReLU()
    
    self.maxpool = nn.MaxPool2d(kernel_size = 2)
    
    #self.Batch1=nn.BatchNorm2d(16)
    #self.Batch2=nn.BatchNorm2d(32)
    #self.Batch3=nn.BatchNorm2d(64)
    #self.Batch4=nn.BatchNorm2d(128)
    
    self.Drop1=nn.Dropout(0.2)
    self.Drop2=nn.Dropout(0.5)


    self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
    self.cnn3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
    self.cnn4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
    
    

    # Fully connected 1 (readout)
    self.fc1 = nn.Linear(128 * 1 * 1, 128) 
    self.fc2=nn.Linear(128,256)
    self.fc3=nn.Linear(256,1)   # age output
    #self.fc4=nn.Linear(256,1)   # gender output

def forward(self, x):

    out = self.cnn1(x) 
    out = self.relu(out)
    out = self.maxpool(out)
    #out=self.Batch1(out)
    out=self.Drop1(out)

    out = self.cnn2(out)
    out = self.relu(out)
    out = self.maxpool(out)
    #out=self.Batch2(out)
    out=self.Drop1(out)
    
    out = self.cnn3(out)
    out = self.relu(out)
    out = self.maxpool(out)
    #out=self.Batch3(out)
    out=self.Drop1(out)
    
    out = self.cnn4(out)
    out = self.relu(out)
    out = self.maxpool(out)
    #out=self.Batch4(out)
    out=self.Drop1(out)
    

    # Resize
    # Original size: (100, 32, 7, 7)
    # out.size(0): 100
    # New out size: (100, 32*7*7)
    out = out.view(out.size(0), -1)

    # Linear function (readout)
    out = self.fc1(out)
    
    out=self.Drop2(out)
    
    out=self.fc2(out)
    
    out=self.Drop2(out)
    
    out=self.fc3(out)
    #out=self.fc4(out)
    
    

    return out

model = CNNModel()

learning_rate=0.001
import torch.optim as optim

Loss and optimizer

criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

training model

t1_start = perf_counter()
for epoch in range(2):
running_loss = 0.0
for i, data in enumerate(trainloader,0):
images, labels = data
#labels = labels.unsqueeze(0)
optimizer.zero_grad() # zero the parameter gradients

    #outputs = model(images.float())  # forward
    outputs = model(images.float())  # forward
    
    loss = criterion(outputs, labels)  #calculate the loss between the target and the actuals
    
    loss.backward() #Gradient calculation uisng backward pass
    
    optimizer.step()  # update the weights
    

    
    running_loss = loss.item()
    if i % 10 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, i * len(images), len(trainloader.dataset),
                    100. * i / len(trainloader), loss.item()))

‘’’

Please help me to solve this problem. I don’t understand where i am wrong.
Thank you in advance.

As indicated in the error message the model output and target tensor shapes should be equal during the loss calculation in nn.BCELoss. Check what the dimensions and shapes of your tensors mean and make sure each logit or probability of the model output matches a target.
Also, since you are using nn.BCELoss you would have to apply a sigmoid on the output. The better approach would be to stick to the output of raw logits and to use nn.BCEWithLogits instead for better numerical stability.

Thank you sir. and Sorry for the late reply.

How to improve the neural network programming skills. Your advice will be highly beneficial to me. Thank you in advance.

Generally, I would differentiate between programming skills and ML/AI background knowledge. When it comes to the programming task, I think it is always create to know some software engineering basics such as building flexible code or using a debugger. Especially, the latter helped me a lot. For sure, it is helpful to understand how a stacktrace works. Apart from that, there are great classes on e.g. Coursera that you can take. If you are new to the topic, I think it is quite calming to realize that really understanding some algorithms and the theory behind those is an ongoing (and probably never ending) process. Hence, don’t be disappointed, if you are not able to understand an algorithm at first sight.

Thank you. Your guidance has been highly helpful to me.

I am still getting “ValueError: Target size (torch.Size([2, 2, 1])) must be the same as input size (torch.Size([2, 2]))” this error. I have changed the loss function with nn.BCEWithLogitsLoss() and add sigmoid on output side. I changed the batch_size to 2.
I am a newbie in this neural network. I am developing a model for age and gender detection. I have used the map function for age classes as well as for gender also.
For eg., If we want to get the age and gender for the 100th image then it shows
age_gender_labels[100]

array([[2],
[1]], dtype=int64)
The first array ([[2] shows the age range i.e. between age range 10-20. and [1] shows it is female gender.
So, is it possible to get this type of output? And what should be the output_feature in this case?

Please help me to how to solve this error.

This my Model.
import torch.nn as nn
import torch.nn.functional as F

class CNNModel(nn.Module):
def init(self):
super(CNNModel, self).init()

    self.conv1 = nn.Conv2d(in_channels=1, out_channels=16,
                          kernel_size=3, stride=1, padding=1)
    
    self.maxpool = nn.MaxPool2d(kernel_size = 2)
    self.Drop1=nn.Dropout(0.2)
    self.Drop2=nn.Dropout(0.5)
    
    self.conv2 = nn.Conv2d(in_channels=16, out_channels=32,
                          kernel_size=3, stride=1, padding=1)
    
    self.conv3 = nn.Conv2d(in_channels=32, out_channels=64,
                          kernel_size=3, stride=1, padding=1)
    
    self.conv4 = nn.Conv2d(in_channels=64, out_channels=128,
                          kernel_size=3, stride=1, padding=1)
    
    self.fc1 = nn.Linear(in_features=1152, out_features=128)  #self.fc1 = nn.Linear(128 * 1 * 1, 128) 
    self.fc2 = nn.Linear(in_features=128, out_features=256)       #self.fc2=nn.Linear(128,256)
    self.fc3 = nn.Linear(in_features=256, out_features=2)
    
    
def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.maxpool(x)
    x = self.Drop1(x)
    
    x = F.relu(self.conv2(x))
    x = self.maxpool(x)
    x = self.Drop1(x)
    
    x = F.relu(self.conv3(x))
    x = self.maxpool(x)
    x = self.Drop1(x)
    
    x = F.relu(self.conv4(x))
    x = self.maxpool(x)
    x = self.Drop1(x)
    
    print('x_shape: ',x.shape)
    x = x.view(x.size(0),-1)
    
    x = F.relu(self.fc1(x))
    x = self.Drop2(x)
    
    x = F.relu(self.fc2(x))
    x = self.Drop2(x)
    
    x = F.sigmoid(self.fc3(x))
    return x

model = CNNModel()

learning_rate=0.001
import torch.optim as optim

Loss and optimizer

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

t1_start = perf_counter()
for epoch in range(2):
running_loss = 0.0
for i, data in enumerate(trainloader,0):
images, labels = data
#labels = labels.unsqueeze(0)
optimizer.zero_grad() # zero the parameter gradients

    outputs = model(images.float())  # forward
    #outputs.squeeze(-1)
    #data = data.unsqueeze(1)
    loss = criterion(outputs, labels)  #calculate the loss between the target and the actuals
    
    loss.backward() #Gradient calculation uisng backward pass
    
    optimizer.step()  # update the weights
    

    
    running_loss = loss.item()
    if i % 10 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, i * len(images), len(trainloader.dataset),
                    100. * i / len(trainloader), loss.item()))

Thank you in advance

This is wrong since nn.BCEWithLogitsLoss expects logits as the model output, so remove the sigmoid activation.

I don’t fully understand this approach, as it seems your use case would need to be split into a:

  • regression output for the “age”
  • and a classification output for the “gender” target.

If so, then you should use different output layers or at least use a regression criterion (e.g. nn.MSELoss) for the “age” output.

As the error message explains the model output and target shape should be equal. Check what each dimension represents and consider removing dim2 from the target if needed.