TypeError: forward() missing 1 required positional argument: 'c'

I created this simplified version of VGG16:

class VGG16COMBO(nn.Module):
    
    def __init__(self, num_classes):
        super(VGG16COMBO, self).__init__()

        # calculate same padding:
        # (w - k + 2*p)/s + 1 = o
        # => p = (s(o-1) - w + k)/2

        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=1,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      # (1(32-1)- 32 + 3)/2 = 1
                      padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )

        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=64,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )
        
        self.block_3 = nn.Sequential(
            nn.Conv2d(in_channels=128,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(in_channels=256,
                      out_channels=256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        )

        self.block_4 = nn.Sequential(
            nn.Conv2d(in_channels=256,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(in_channels=512,
                      out_channels=512,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2))
        ) 


        self.classifier = nn.Sequential(
            nn.Linear(2048, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.25),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.25),
            nn.Linear(4096, num_classes),
        )

    def forward(self, m, c):

        m = self.block_1(m)
        m = self.block_2(m)
        m = self.block_3(m)
        m = self.block_4(m)
        m = m.view(m.size(0), -1)
        m = self.classifier(m)

        c = self.block_1(c)
        c = self.block_2(c)
        c = self.block_3(c)
        c = self.block_4(c)
        c = c.view(c.size(0), -1)
        c = self.classifier(c)

        x = torch.cat((m, c), dim=1)
        return x

As you can see, in the forward I pass 2 arguments, m and c, that are referred to data of Mnist and Cifar10.
Then I create the model:

modelcombo = VGG16COMBO(1).cuda()
print(modelcombo)

# Define an optimizier
import torch.optim as optim
optimizer = optim.SGD(modelcombo.parameters(), lr = 0.01)
# Define a loss 
criterion = nn.BCEWithLogitsLoss()

The problem is in the training function:

def train(net, loaders, optimizer, criterion, epochs=20, dev=dev, save_param = False, model_name="valerio"):
    try:
        net = net.to(dev)
        #print(net)
        # Initialize history
        history_loss = {"train": [], "val": [], "test": []}
        history_accuracy = {"train": [], "val": [], "test": []}
        # Store the best val accuracy
        best_val_accuracy = 0

        # Process each epoch
        for epoch in range(epochs):
            # Initialize epoch variables
            sum_loss = {"train": 0, "val": 0, "test": 0}
            sum_accuracy = {"train": 0, "val": 0, "test": 0}
            # Process each split
            for split in ["train", "val", "test"]:
                if split == "train":
                  net.train()
                else:
                  net.eval()
                # Process each batch
                for (input, labels) in loaders[split]:
                    # Move to CUDA
                    input = input.to(dev)
                    labels = labels.to(dev)
                    # Reset gradients
                    optimizer.zero_grad()
                    # Compute output
                    pred = net(input)
                    #pred = pred.squeeze(dim=1) # Output shape is [Batch size, 1], but we want [Batch size]
                    labels = labels.unsqueeze(1)
                    labels = labels.float()
                    loss = criterion(pred, labels)
                    # Update loss
                    sum_loss[split] += loss.item()
                    # Check parameter update
                    if split == "train":
                        # Compute gradients
                        loss.backward()
                        # Optimize
                        optimizer.step()
                    # Compute accuracy
                    #pred_labels = pred.argmax(1) + 1
                    pred_labels = (pred >= 0.5).long() # Binarize predictions to 0 and 1
                    batch_accuracy = (pred_labels == labels).sum().item()/input.size(0)
                    # Update accuracy
                    sum_accuracy[split] += batch_accuracy
            # Compute epoch loss/accuracy
            epoch_loss = {split: sum_loss[split]/len(loaders[split]) for split in ["train", "val", "test"]}
            epoch_accuracy = {split: sum_accuracy[split]/len(loaders[split]) for split in ["train", "val", "test"]}

            # Store params at the best validation accuracy
            if save_param and epoch_accuracy["val"] > best_val_accuracy:
              #torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
              torch.save(net.state_dict(), f"{model_name}_best_val.pth")
              best_val_accuracy = epoch_accuracy["val"]

            # Update history
            for split in ["train", "val", "test"]:
                history_loss[split].append(epoch_loss[split])
                history_accuracy[split].append(epoch_accuracy[split])
            # Print info
            print(f"Epoch {epoch+1}:",
                  f"TrL={epoch_loss['train']:.4f},",
                  f"TrA={epoch_accuracy['train']:.4f},",
                  f"VL={epoch_loss['val']:.4f},",
                  f"VA={epoch_accuracy['val']:.4f},",
                  f"TeL={epoch_loss['test']:.4f},",
                  f"TeA={epoch_accuracy['test']:.4f},")
    except KeyboardInterrupt:
        print("Interrupted")
    finally:
        # Plot loss
        plt.title("Loss")
        for split in ["train", "val", "test"]:
            plt.plot(history_loss[split], label=split)
        plt.legend()
        plt.show()
        # Plot accuracy
        plt.title("Accuracy")
        for split in ["train", "val", "test"]:
            plt.plot(history_accuracy[split], label=split)
        plt.legend()
        plt.show()

Because when I call the function the problem is that I am passing only one dataloader, the one of mnist, and not for cifar.

# Train model
train(modelcombo, loaders, optimizer, criterion, epochs=10, dev=dev) #loaders is only for mnist
#I want to pass also loaders_cifar

TypeError: forward() missing 1 required positional argument: 'c'

Now, I have to modify the training function, the forward function, or I have to combine the loaders in such a way?

Hi,

your def forward(self, m, c): expects an argument ‘c’ (as the error tells as well).
I don’t know, what your loaders contains, but based on your model, you do also have to pass the CIFAR data.

Regards,
Unity05

These are the loaders of MNIST

# Define loaders
from torch.utils.data import DataLoader
train_loader = DataLoader(train_set, batch_size=64, num_workers=2, shuffle=True, drop_last=True)
val_loader   = DataLoader(val_set,   batch_size=64, num_workers=2, shuffle=False, drop_last=False)
test_loader  = DataLoader(test_set,  batch_size=64, num_workers=2, shuffle=False, drop_last=False)

# Define dictionary of loaders
loaders = {"train": train_loader,
           "val": val_loader,
           "test": test_loader}

And these of CIFAR

# Define loaders
from torch.utils.data import DataLoader
train_loader_cifar = DataLoader(train_set_cifar, batch_size=64, num_workers=2, shuffle=True, drop_last=True)
val_loader_cifar   = DataLoader(val_set_cifar,   batch_size=64, num_workers=2, shuffle=False, drop_last=False)
test_loader_cifar  = DataLoader(test_set_cifar,  batch_size=64, num_workers=2, shuffle=False, drop_last=False)

# Define dictionary of loaders
loaders_cifar = {"train": train_loader_cifar,
           "val": val_loader_cifar,
           "test": test_loader_cifar}

Then instead of for (input, labels) in loaders[split]: you might want to do something like for (input_m, labels_m), (input_c, labels_c) in zip(loaders[split], loaders_cifar[split]): and therefore also instead of pred = net(input) something like pred = net(input_m, input_c).

Regards,
Unity05

I did in this way:

def train(net, loaders, optimizer, criterion, epochs=10, dev=dev, save_param = False, model_name="valerio"):
    loaders_mnist, loaders_cifar = loaders
    try:
        net = net.to(dev)
        #print(net)
        # Initialize history
        history_loss = {"train": [], "val": [], "test": []}
        history_accuracy = {"train": [], "val": [], "test": []}
        # Store the best val accuracy
        best_val_accuracy = 0

        # Process each epoch
        for epoch in range(epochs):
            # Initialize epoch variables
            sum_loss = {"train": 0, "val": 0, "test": 0}
            sum_accuracy = {"train": 0, "val": 0, "test": 0}
            # Process each split
            for split in ["train", "val", "test"]:
                if split == "train":
                  net.train()
                else:
                  net.eval()
                # Process each batch
                for ((input_mnist, labels_mnist), (input_cifar, labels_cifar)) in zip(loaders_mnist[split], loaders_cifar[split]):
                #for (input, labels) in loaders[split]:
                    # Move to CUDA
                    input_mnist = input_mnist.to(dev)
                    labels_mnist = labels_mnist.to(dev)
                    input_cifar = input_cifar.to(dev)
                    labels_cifar = labels_cifar.to(dev)
                    # Reset gradients
                    optimizer.zero_grad()
                    # Compute output
                    pred = net(input_mnist, input_cifar)
                    #pred = pred.squeeze(dim=1) # Output shape is [Batch size, 1], but we want [Batch size]
                    labels = labels.unsqueeze(1)
                    labels = labels.float()
                    loss = criterion(pred, labels)
                    # Update loss
                    sum_loss[split] += loss.item()
                    # Check parameter update
                    if split == "train":
                        # Compute gradients
                        loss.backward()
                        # Optimize
                        optimizer.step()
                    # Compute accuracy
                    #pred_labels = pred.argmax(1) + 1
                    pred_labels = (pred >= 0.5).long() # Binarize predictions to 0 and 1
                    batch_accuracy = (pred_labels == labels).sum().item()/input.size(0)
                    # Update accuracy
                    sum_accuracy[split] += batch_accuracy
            # Compute epoch loss/accuracy
            epoch_loss = {split: sum_loss[split]/len(loaders[split]) for split in ["train", "val", "test"]}
            epoch_accuracy = {split: sum_accuracy[split]/len(loaders[split]) for split in ["train", "val", "test"]}

            # Store params at the best validation accuracy
            if save_param and epoch_accuracy["val"] > best_val_accuracy:
              #torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
              torch.save(net.state_dict(), f"{model_name}_best_val.pth")
              best_val_accuracy = epoch_accuracy["val"]

            # Update history
            for split in ["train", "val", "test"]:
                history_loss[split].append(epoch_loss[split])
                history_accuracy[split].append(epoch_accuracy[split])
            # Print info
            print(f"Epoch {epoch+1}:",
                  f"TrL={epoch_loss['train']:.4f},",
                  f"TrA={epoch_accuracy['train']:.4f},",
                  f"VL={epoch_loss['val']:.4f},",
                  f"VA={epoch_accuracy['val']:.4f},",
                  f"TeL={epoch_loss['test']:.4f},",
                  f"TeA={epoch_accuracy['test']:.4f},")
    except KeyboardInterrupt:
        print("Interrupted")
    finally:
        # Plot loss
        plt.title("Loss")
        for split in ["train", "val", "test"]:
            plt.plot(history_loss[split], label=split)
        plt.legend()
        plt.show()
        # Plot accuracy
        plt.title("Accuracy")
        for split in ["train", "val", "test"]:
            plt.plot(history_accuracy[split], label=split)
        plt.legend()
        plt.show()
# Train model
train(modelcombo, (loaders, loaders_cifar), optimizer, criterion, epochs=3, dev=dev)

I have this error:

---------------------------------------------------------------------------

UnboundLocalError                         Traceback (most recent call last)

<ipython-input-120-991dbce10c53> in <module>()
      1 # Train model
----> 2 train(modelcombo, (loaders, loaders_cifar), optimizer, criterion, epochs=3, dev=dev)

<ipython-input-119-8f7766b35a35> in train(net, loaders, optimizer, criterion, epochs, dev, save_param, model_name)
     34                     pred = net(input_mnist, input_cifar)
     35                     #pred = pred.squeeze(dim=1) # Output shape is [Batch size, 1], but we want [Batch size]
---> 36                     labels = labels.unsqueeze(1)
     37                     labels = labels.float()
     38                     loss = criterion(pred, labels)

UnboundLocalError: local variable 'labels' referenced before assignment

Obviously I tried to fix, with the above code, but I failed:

def train2(net, loaders, optimizer, criterion, epochs=10, dev=dev, save_param = False, model_name="valerio"):
    loaders_mnist, loaders_cifar = loaders
    try:
        net = net.to(dev)
        #print(net)
        # Initialize history
        history_loss = {"train": [], "val": [], "test": []}
        history_accuracy = {"train": [], "val": [], "test": []}
        # Store the best val accuracy
        best_val_accuracy = 0

        # Process each epoch
        for epoch in range(epochs):
            # Initialize epoch variables
            sum_loss = {"train": 0, "val": 0, "test": 0}
            sum_accuracy = {"train": 0, "val": 0, "test": 0}
            # Process each split
            for split in ["train", "val", "test"]:
                if split == "train":
                  net.train()
                else:
                  net.eval()
                # Process each batch
                for ((input_mnist, labels_mnist), (input_cifar, labels_cifar)) in zip(loaders_mnist[split], loaders_cifar[split]):
                #for (input, labels) in loaders[split]:
                    # Move to CUDA
                    input_mnist = input_mnist.to(dev)
                    labels_mnist = labels_mnist.to(dev)
                    input_cifar = input_cifar.to(dev)
                    labels_cifar = labels_cifar.to(dev)
                    # Reset gradients
                    optimizer.zero_grad()
                    # Compute output
                    pred = net(input_mnist, input_cifar)
                    #pred = pred.squeeze(dim=1) # Output shape is [Batch size, 1], but we want [Batch size]
                    labels_mnist = labels_mnist.unsqueeze(1)
                    labels_mnist = labels_mnist.float()
                    labels_cifar = labels_cifar.unsqueeze(1)
                    labels_cifar = labels_cifar.float()
                    labels = labels_mnist, labels_cifar
                    loss = criterion(pred, labels)
                    # Update loss
                    sum_loss[split] += loss.item()
                    # Check parameter update
                    if split == "train":
                        # Compute gradients
                        loss.backward()
                        # Optimize
                        optimizer.step()
                    # Compute accuracy
                    #pred_labels = pred.argmax(1) + 1
                    pred_labels = (pred >= 0.5).long() # Binarize predictions to 0 and 1
                    batch_accuracy = (pred_labels == labels).sum().item()/input.size(0)
                    # Update accuracy
                    sum_accuracy[split] += batch_accuracy
            # Compute epoch loss/accuracy
            epoch_loss = {split: sum_loss[split]/len(loaders[split]) for split in ["train", "val", "test"]}
            epoch_accuracy = {split: sum_accuracy[split]/len(loaders[split]) for split in ["train", "val", "test"]}

            # Store params at the best validation accuracy
            if save_param and epoch_accuracy["val"] > best_val_accuracy:
              #torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
              torch.save(net.state_dict(), f"{model_name}_best_val.pth")
              best_val_accuracy = epoch_accuracy["val"]

            # Update history
            for split in ["train", "val", "test"]:
                history_loss[split].append(epoch_loss[split])
                history_accuracy[split].append(epoch_accuracy[split])
            # Print info
            print(f"Epoch {epoch+1}:",
                  f"TrL={epoch_loss['train']:.4f},",
                  f"TrA={epoch_accuracy['train']:.4f},",
                  f"VL={epoch_loss['val']:.4f},",
                  f"VA={epoch_accuracy['val']:.4f},",
                  f"TeL={epoch_loss['test']:.4f},",
                  f"TeA={epoch_accuracy['test']:.4f},")
    except KeyboardInterrupt:
        print("Interrupted")
    finally:
        # Plot loss
        plt.title("Loss")
        for split in ["train", "val", "test"]:
            plt.plot(history_loss[split], label=split)
        plt.legend()
        plt.show()
        # Plot accuracy
        plt.title("Accuracy")
        for split in ["train", "val", "test"]:
            plt.plot(history_accuracy[split], label=split)
        plt.legend()
        plt.show()

Error:

---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-113-96aba51d9c54> in <module>()
      1 # Train model
----> 2 train2(modelcombo, (loaders, loaders_cifar), optimizer, criterion, epochs=3, dev=dev)

3 frames

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy_with_logits(input, target, weight, size_average, reduce, reduction, pos_weight)
   2824         reduction_enum = _Reduction.get_enum(reduction)
   2825 
-> 2826     if not (target.size() == input.size()):
   2827         raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
   2828 

AttributeError: 'tuple' object has no attribute 'size'

Well,
AttributeError: 'tuple' object has no attribute 'size' this is because BCE does not take tuple as arguments, but you’re passing labels = labels_mnist, labels_cifar. You might want to treat them as different pairs (MNIST and CIFAR) in the same batch (reduction would also take the job to apply the reduction. That would of course mean you had to do the necessary changes on the model output.
However, I’ve just seen that your concatenating the MNIST and CIFAR results at dimension 1 from the classifier. I’m not sure if that’s intentional. If not, then you could return them as a tuple, what would furthermore help you in the loss part.
However, may I ask why you want to process two different datasets in one forward ‘in parallel’?.

Regards,
Unity05

However, I’ve just seen that your concatenating the MNIST and CIFAR results at dimension 1 from the classifier. I’m not sure if that’s intentional.

Yes, it is intentional. I want a multi input network. I am doing an experiment of meta-transfer learning, in which I sum weights of 2 networks basically. And now i am trying to have a multi input network trained on 2 datasets.
You are right, I am defining a new loss function for this experiment, I will not use BCEWithLogitLoss
So, do you suggest to modify something in the net, or I can work on my custom loss function?
My CustomLoss will be this: CrossEntropy(MNIST) + CrossEntropy(CIFAR10) + CrossEntropy([MNIST+CIFAR10]-MNIST) (this last term of loss should guarantee that the network on MNIST have similar performances before and after the operation of sum.
Tomorrow I will try to implement something like this, and probably I will ask for help here :smiley:
Edit:
@Unity05 do you think a solution like this could be good?

class CustomLossFunction(nn.Module):
    def __init__(self):
        super(CustomLossFunction, self).__init__()
    def forward(self, pred_mnist, pred_cifar, target_mnist, target_cifar):
        loss_mnist = nn.BCEWithLogitsLoss(pred_mnist, target_mnist)
        loss_cifar = nn.BCEWithLogitsLoss(pred_cifar, target_cifar)
        loss_mnist_cifar = nn.BCEWithLogitsLoss(pred_mnist_cifar - pred_mnist, target_mnist_cifar - target_mnist)
        loss_sum = loss_mnist + loss_cifar + loss_mnist_cifar
        return loss_sum