The size of tensor a (64) must match the size of tensor b (14) at non-singleton dimension 1

    def __init__(self, data_frame, transform=None):
        self.data_frame = data_frame
        self.transform = transform

    def __getitem__(self, idx):
        row = self.data_frame.iloc[idx]
        address = row['path']
        data = Image.open(address).convert('RGB')
        label = np.array(row['disease_vec'], dtype=np.float64)  # np.float64 or np.float

        transform = transforms.Compose([
            transforms.Resize(256),
            # transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        return transform(data), torch.FloatTensor(label)

    def __len__(self):
        return len(self.data_frame)


# Creating the Dataset and loader
test_dataset = XrayDataset(test_df)
train_dataset = XrayDataset(train_df)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=64,
    num_workers=0,
    shuffle=True,
)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=64,
    num_workers=0,
    shuffle=True,
)
# eof Dataloader #
np.random.seed(42)
torch.manual_seed(42)


class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # Image size 256 * 256 * 3 input channels
        # 1. convolutional layer
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.conv1_bn = nn.BatchNorm2d(32)
        # outputs - (32 filter images, 254 * 254)
        # 2. convolutional layer
        # sees 254 * 254 * 32 tensor (2x2 MaxPooling layer beforehand)
        self.conv2 = nn.Conv2d(32, 32, 3)
        self.conv2_bn = nn.BatchNorm2d(32)
        # outputs 126 * 126 * 32 filtered images, kernel-size is 3
        # 3. convolutional layer
        # sees 126 x 126 x 32 tensor (2x2 MaxPooling layer beforehand)
        self.conv3 = nn.Conv2d(32, 64, 3)
        self.conv3_bn = nn.BatchNorm2d(64)
        # outputs 124 * 124 * 64 filtered images, kernel-size is 3
        # 4 Convolution Layer
        # 124 * 124 * 64 Image
        self.conv4 = nn.Conv2d(64, 64, 3)
        self.conv4_bn = nn.BatchNorm2d(64)
        # output tensor 61 * 61 * 64
        # 5. convolutional layer
        self.conv5 = nn.Conv2d(64, 128, 3)
        self.conv5_bn = nn.BatchNorm2d(128)
        # outputs 59 * 59 * 128 filter images
        # 6 convolutional layer
        self.conv6 = nn.Conv2d(128, 128, 3)
        self.conv6_bn = nn.BatchNorm2d(128)
        # outputs 28 * 28 * 128 filtered Images

        # Definition of the MaxPooling layer
        self.pool = nn.MaxPool2d(2, 2)

        # 1. fully-connected layer
        # Input is a flattened 28*28*128 dimensional vector
        # Output is 500 dimensional vector
        self.fc1 = nn.Linear(128 * 28 * 28, 128)
        self.fc1_bn = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 14)
        # self.fc3 = nn.Linear(14, 10)

        # definition of dropout (dropout probability 25%)
        self.dropout20 = nn.Dropout(0.2)
        self.dropout30 = nn.Dropout(0.3)
        self.dropout40 = nn.Dropout(0.4)
        self.dropout50 = nn.Dropout(0.5)

    def forward(self, x):
        x = self.conv1_bn(F.relu(self.conv1(x)))
        print(x.shape, "-- after 1st convolution layer --")
        x = self.pool(self.conv2_bn(F.relu(self.conv2(x))))
        print(x.shape, " --- after 2nd convolution layer --")
        x = self.dropout20(x)
        x = self.conv3_bn(F.relu(self.conv3(x)))
        print(x.shape, " -- after 3rd convolution layer --")
        x = self.pool(self.conv4_bn(F.relu(self.conv4(x))))
        print(x.shape, "-- after 4th convolution  layer -- ")
        x = self.dropout30(x)
        x = self.conv5_bn(F.relu(self.conv5(x)))
        print(x.shape, " -- after 5th convolution  layer--")
        x = self.pool(self.conv6_bn(F.relu(self.conv6(x))))
        print(x.shape, "-- after 6th convolution layer --")
        x = self.dropout40(x)
        # flatten output of third convolutional layer into a vector
        # this vector is passed through the fully-connected nn
        x = x.view(-1, 128 * 28 * 28)
        print(x.shape, "-- after the view call -- ")
        # add dropout layer
        # add 1st hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        print(x.shape, "After full connect layer 1")
        # add dropout layer
        x = self.dropout50(x)
        # add 2nd hidden layer, without relu activation function
        x = self.fc2(x)
        # x = F.relu(self.fc2(x))
        print(x.shape, "After full connected layer 2")
        return x


model = ConvNet().to(device)

# Hyper Parameters
num_epochs = 10
weight_decay = 5e-4
learning_rate = 0.001

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)


def train(epoch):
    model.train()
    running_loss = 0.0
    train_total, train_correct = 0.0, 0.0
    y_train, y_pred = [], []
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        print("--- Shape of Image before hitting the loss function, ", outputs.size())
        loss = criterion(outputs, labels)
        print("--- Image Data type-- ", outputs.size())
        print("--- Labels Data type--", labels.size())
        # loss = criterion(outputs, torch.max(labels, 1)[1])

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(outputs.shape)
        print(labels.shape)

        running_loss += loss.item()
        _, train_predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        print("Training total ---", train_total)
        print("Training predicted", train_predicted)
        train_correct += (train_predicted == labels.long()).sum().item()
        y_train += labels.tolist()
        y_pred += train_predicted.tolist()

        if i % 2000 == 0:
            print('Epoch: {} [{}/{} ({:.0f}%)]\tloss: {:.6f}'.format(
                epoch, i * len(images), len(train_loader.dataset),
                       100. * i / len(train_loader), loss.item()))

    macro_f1 = f1_score(y_train, y_pred, average='macro')
    print("epoch (%d): Train accuracy: %.4f, f1_score: %.4f, loss: %.3f" % (
        epoch, train_correct / train_total, macro_f1, running_loss / train_total))


# Train the model
for epoch in range(1, num_epochs + 1):
    train(epoch)

out put on run

Scans found: 112120 , Total Headers 112120
torch.Size([64, 32, 254, 254]) – after 1st convolution layer –
torch.Size([64, 32, 126, 126]) — after 2nd convolution layer –
torch.Size([64, 64, 124, 124]) – after 3rd convolution layer –
torch.Size([64, 64, 61, 61]) – after 4th convolution layer –
torch.Size([64, 128, 59, 59]) – after 5th convolution layer–
torch.Size([64, 128, 28, 28]) – after 6th convolution layer –
torch.Size([64, 100352]) – after the view call –
torch.Size([64, 128]) After full connect layer 1
torch.Size([64, 14]) After full connected layer 2
— Shape of Image before hitting the loss function, torch.Size([64, 14])
— Image Data type-- torch.Size([64, 14])
— Labels Data type-- torch.Size([64, 14])
torch.Size([64, 14])
torch.Size([64, 14])
Training total — 64.0
Training predicted tensor([ 4, 5, 1, 3, 1, 11, 5, 12, 3, 12, 12, 12, 12, 5, 12, 0, 3, 11,
2, 12, 3, 12, 1, 11, 8, 7, 12, 1, 12, 1, 8, 5, 12, 5, 12, 11,
12, 11, 9, 12, 8, 3, 5, 2, 10, 11, 1, 1, 2, 5, 1, 5, 1, 1,
12, 11, 8, 12, 3, 3, 11, 12, 12, 1])
####. eof ###

Getting this error o n my neural network and not sure how to fix it “The size of tensor a (64) must match the size of tensor b (14) at non-singleton dimension 1”

This line of code:

x = x.view(-1, 128 * 28 * 28)

is often wrong as it could change the batch size, which might also be the case here.
Use x = x.view(x.size(0), -1) to flatten the activation dimension instead and fix potential shape mismatch errors in the next linear layer.

I made the correction and still am running in to the same error message. Also which linear layer are you referring to about changing ? I had it going down to 14 because it will error if I switch it to 10

Not sure what you mean about the next linear layer can you elaborate more on that?

This is the error message I get if I’m changing it from 14 to 10 on the shape

“RuntimeError: The size of tensor a (10) must match the size of tensor b (14) at non-singleton dimension 1”

After the flattening operation the next layer is self.fc1 which is what I’m referring to.
In case you get stuck, could you post a minimal and executable code snippet to reproduce the issue?

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # Image size 256 * 256 * 3 input channels
        # 1. convolutional layer
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.conv1_bn = nn.BatchNorm2d(32)
        # outputs - (32 filter images, 254 * 254)
        # 2. convolutional layer
        # sees 254 * 254 * 32 tensor (2x2 MaxPooling layer beforehand)
        self.conv2 = nn.Conv2d(32, 32, 3)
        self.conv2_bn = nn.BatchNorm2d(32)
        # outputs 126 * 126 * 32 filtered images, kernel-size is 3
        # 3. convolutional layer
        # sees 126 x 126 x 32 tensor (2x2 MaxPooling layer beforehand)
        self.conv3 = nn.Conv2d(32, 64, 3)
        self.conv3_bn = nn.BatchNorm2d(64)
        # outputs 124 * 124 * 64 filtered images, kernel-size is 3
        # 4 Convolution Layer
        # 124 * 124 * 64 Image
        self.conv4 = nn.Conv2d(64, 64, 3)
        self.conv4_bn = nn.BatchNorm2d(64)
        # output tensor 61 * 61 * 64
        # 5. convolutional layer
        self.conv5 = nn.Conv2d(64, 128, 3)
        self.conv5_bn = nn.BatchNorm2d(128)
        # outputs 59 * 59 * 128 filter images
        # 6 convolutional layer
        self.conv6 = nn.Conv2d(128, 128, 3)
        self.conv6_bn = nn.BatchNorm2d(128)
        # outputs 28 * 28 * 128 filtered Images

        # Definition of the MaxPooling layer
        self.pool = nn.MaxPool2d(2, 2)

        # 1. fully-connected layer
        # Input is a flattened 28*28*128 dimensional vector
        # Output is 500 dimensional vector
        self.fc1 = nn.Linear(128 * 28 * 28, 128)
        self.fc1_bn = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 14)
        # self.fc3 = nn.Linear(14, 10)

        # definition of dropout (dropout probability 25%)
        self.dropout20 = nn.Dropout(0.2)
        self.dropout30 = nn.Dropout(0.3)
        self.dropout40 = nn.Dropout(0.4)
        self.dropout50 = nn.Dropout(0.5)

    def forward(self, x):
        x = self.conv1_bn(F.relu(self.conv1(x)))
        print(x.shape, "-- after 1st convolution layer --")
        x = self.pool(self.conv2_bn(F.relu(self.conv2(x))))
        print(x.shape, " --- after 2nd convolution layer --")
        x = self.dropout20(x)
        x = self.conv3_bn(F.relu(self.conv3(x)))
        print(x.shape, " -- after 3rd convolution layer --")
        x = self.pool(self.conv4_bn(F.relu(self.conv4(x))))
        print(x.shape, "-- after 4th convolution  layer -- ")
        x = self.dropout30(x)
        x = self.conv5_bn(F.relu(self.conv5(x)))
        print(x.shape, " -- after 5th convolution  layer--")
        x = self.pool(self.conv6_bn(F.relu(self.conv6(x))))
        print(x.shape, "-- after 6th convolution layer --")
        x = self.dropout40(x)
        # flatten output of third convolutional layer into a vector
        # this vector is passed through the fully-connected nn
        # x = x.view(-1, 128 * 28 * 28)
        x = x.view(x.size(0), -1)
        print(x.shape, "-- after the view call -- ")
        # add dropout layer
        # add 1st hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        print(x.shape, "After 1st full connect layer")
        # add dropout layer
        x = self.dropout50(x)
        # add 2nd hidden layer, without relu activation function
        x = self.fc2(x)
        # x = F.relu(self.fc2(x))
        print(x.shape, "After 2nd full connected layer")
        return x


model = ConvNet().to(device)

# Hyper Parameters
num_epochs = 10
weight_decay = 5e-4
learning_rate = 0.001

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)


def train(epoch):
    model.train()
    running_loss = 0.0
    train_total, train_correct = 0.0, 0.0
    y_train, y_pred = [], []
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        print("--- Shape of Image before hitting the loss function, ", outputs.size())
        loss = criterion(outputs, labels)
        print("--- Image Data type-- ", outputs.size())
        print("--- Labels Data type--", labels.size())
        # loss = criterion(outputs, torch.max(labels, 1)[1])

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(outputs.shape)
        print(labels.shape)

        running_loss += loss.item()
        print("Running loss - ", running_loss)
        _, train_predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        print('line 255', train_predicted.shape)
        print('another line', labels.shape)
        print('line 256', labels.long().shape)
        print(train_predicted == labels.long())

        # print("Training total ---", train_total)
        # print("Training predicted", train_predicted)
        train_correct += (train_predicted == labels.long()).sum().item()
        y_train += labels.tolist()
        y_pred += train_predicted.tolist()

        if i % 2000 == 0:
            print('Epoch: {} [{}/{} ({:.0f}%)]\tloss: {:.6f}'.format(
                epoch, i * len(images), len(train_loader.dataset),
                       100. * i / len(train_loader), loss.item()))

    macro_f1 = f1_score(y_train, y_pred, average='macro')
    print("epoch (%d): Train accuracy: %.4f, f1_score: %.4f, loss: %.3f" % (
        epoch, train_correct / train_total, macro_f1, running_loss / train_total))


# Train the model
for epoch in range(1, num_epochs + 1):
    train(epoch)



Here’s the image information

torch.Size([64, 100352]) – after the view call –
torch.Size([64, 128]) After 1st full connect layer

Error Message - RuntimeError: The size of tensor a (64) must match the size of tensor b (14) at non-singleton dimension 1

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # Image size 256 * 256 * 3 input channels
        # 1. convolutional layer
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.conv1_bn = nn.BatchNorm2d(32)
        # outputs - (32 filter images, 254 * 254)
        # 2. convolutional layer
        # sees 254 * 254 * 32 tensor (2x2 MaxPooling layer beforehand)
        self.conv2 = nn.Conv2d(32, 32, 3)
        self.conv2_bn = nn.BatchNorm2d(32)
        # outputs 126 * 126 * 32 filtered images, kernel-size is 3
        # 3. convolutional layer
        # sees 126 x 126 x 32 tensor (2x2 MaxPooling layer beforehand)
        self.conv3 = nn.Conv2d(32, 64, 3)
        self.conv3_bn = nn.BatchNorm2d(64)
        # outputs 124 * 124 * 64 filtered images, kernel-size is 3
        # 4 Convolution Layer
        # 124 * 124 * 64 Image
        self.conv4 = nn.Conv2d(64, 64, 3)
        self.conv4_bn = nn.BatchNorm2d(64)
        # output tensor 61 * 61 * 64
        # 5. convolutional layer
        self.conv5 = nn.Conv2d(64, 128, 3)
        self.conv5_bn = nn.BatchNorm2d(128)
        # outputs 59 * 59 * 128 filter images
        # 6 convolutional layer
        self.conv6 = nn.Conv2d(128, 128, 3)
        self.conv6_bn = nn.BatchNorm2d(128)
        # outputs 28 * 28 * 128 filtered Images

        # Definition of the MaxPooling layer
        self.pool = nn.MaxPool2d(2, 2)

        # 1. fully-connected layer
        # Input is a flattened 28*28*128 dimensional vector
        # Output is 500 dimensional vector
        self.fc1 = nn.Linear(128 * 28 * 28, 128)
        self.fc1_bn = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 14)
        # self.fc3 = nn.Linear(14, 10)

        # definition of dropout (dropout probability 25%)
        self.dropout20 = nn.Dropout(0.2)
        self.dropout30 = nn.Dropout(0.3)
        self.dropout40 = nn.Dropout(0.4)
        self.dropout50 = nn.Dropout(0.5)

    def forward(self, x):
        x = self.conv1_bn(F.relu(self.conv1(x)))
        print(x.shape, "-- after 1st convolution layer --")
        x = self.pool(self.conv2_bn(F.relu(self.conv2(x))))
        print(x.shape, " --- after 2nd convolution layer --")
        x = self.dropout20(x)
        x = self.conv3_bn(F.relu(self.conv3(x)))
        print(x.shape, " -- after 3rd convolution layer --")
        x = self.pool(self.conv4_bn(F.relu(self.conv4(x))))
        print(x.shape, "-- after 4th convolution  layer -- ")
        x = self.dropout30(x)
        x = self.conv5_bn(F.relu(self.conv5(x)))
        print(x.shape, " -- after 5th convolution  layer--")
        x = self.pool(self.conv6_bn(F.relu(self.conv6(x))))
        print(x.shape, "-- after 6th convolution layer --")
        x = self.dropout40(x)
        # flatten output of third convolutional layer into a vector
        # this vector is passed through the fully-connected nn
        # x = x.view(-1, 128 * 28 * 28)
        x = x.view(x.size(0), -1)
        print(x.shape, "-- after the view call -- ")
        # add dropout layer
        # add 1st hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        print(x.shape, "After 1st full connect layer")
        # add dropout layer
        x = self.dropout50(x)
        # add 2nd hidden layer, without relu activation function
        x = self.fc2(x)
        # x = F.relu(self.fc2(x))
        print(x.shape, "After 2nd full connected layer")
        return x


model = ConvNet().to(device)

# Hyper Parameters
num_epochs = 10
weight_decay = 5e-4
learning_rate = 0.001

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)


def train(epoch):
    model.train()
    running_loss = 0.0
    train_total, train_correct = 0.0, 0.0
    y_train, y_pred = [], []
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        print("--- Shape of Image before hitting the loss function, ", outputs.size())
        loss = criterion(outputs, labels)
        print("--- Image Data type-- ", outputs.size())
        print("--- Labels Data type--", labels.size())
        # loss = criterion(outputs, torch.max(labels, 1)[1])

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(outputs.shape)
        print(labels.shape)

        running_loss += loss.item()
        print("Running loss - ", running_loss)
        _, train_predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        print('line 255', train_predicted.shape)
        print('another line', labels.shape)
        print('line 256', labels.long().shape)
        print(train_predicted == labels.long())

        # print("Training total ---", train_total)
        # print("Training predicted", train_predicted)
        train_correct += (train_predicted == labels.long()).sum().item()
        y_train += labels.tolist()
        y_pred += train_predicted.tolist()

        if i % 2000 == 0:
            print('Epoch: {} [{}/{} ({:.0f}%)]\tloss: {:.6f}'.format(
                epoch, i * len(images), len(train_loader.dataset),
                       100. * i / len(train_loader), loss.item()))

    macro_f1 = f1_score(y_train, y_pred, average='macro')
    print("epoch (%d): Train accuracy: %.4f, f1_score: %.4f, loss: %.3f" % (
        epoch, train_correct / train_total, macro_f1, running_loss / train_total))


# Train the model
for epoch in range(1, num_epochs + 1):
    train(epoch)



Here’s the image information

torch.Size([64, 100352]) – after the view call –
torch.Size([64, 128]) After 1st full connect layer

Error Message - RuntimeError: The size of tensor a (64) must match the size of tensor b (14) at non-singleton dimension 1

I’m still very stuck on this problem right now I looked back at the model calculations

Hey, were you able to find the error. I’m using PyCharm so not sure how to the code snippet

Your code was unfortunately not executable and needed some fixes so needs also more time for debugging. You might usually get a faster response if your code snippet is directly executable.

In any case, the code works for me and I cannot reproduce the shape mismatch:

model = ConvNet()
criterion = nn.MSELoss()

images = torch.randn(16, 3, 256, 256)
labels = torch.randn(16, 14)

outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()

No changes were done to the model.