RuntimeError: size mismatch, m1: [16384 x 1], m2: [128 x 2]

Trying to make a model that reads in a simple vector containing a review of a product and outputs a classification (favorable or unfavorable). The code for my classifier and training loop is below:

Model class definition:

class Classifier(nn.Module):
    def __init__(self, initial_n_channels, n_classes, network_n_channels):
        super(Classifier, self).__init__()
        self.network = nn.Sequential(
            nn.Conv1d(in_channels=initial_n_channels,
                      out_channels=network_n_channels,
                      kernel_size=args["kernel_size"]),
            nn.ReLU(),
            nn.Conv1d(in_channels=network_n_channels,
                      out_channels=network_n_channels,
                      kernel_size=args["kernel_size"],
                      stride=args["stride"]),
            nn.ReLU(),
            nn.Conv1d(in_channels=network_n_channels,
                      out_channels=network_n_channels,
                      kernel_size=args["kernel_size"],
                      stride=args["stride"]),
            nn.ReLU(),
            nn.Conv1d(in_channels=network_n_channels,
                      out_channels=network_n_channels,
                      kernel_size=args["kernel_size"],
                      stride=args["stride"]),
            nn.ReLU()
        )
        self.fc = nn.Linear(network_n_channels, n_classes)
        
    def forward(self, x_in, apply_sigmoid=False):
        # diagnostics
        print("classifier diagnostics", "\n",
              "---------------------------------", "\n")
        print("classifier x_in size: ", x_in.size())
        print("classifier weight size: ", self.fc.weight.size())
        
        features = self.network(x_in)
        prediction_vector = self.fc(features)
        if apply_sigmoid:
            prediction_vector = F.sigmoid(prediction_vector, dim=1)
        return prediction_vector.double()

Instantiation:

# dataset and vectorizer
dataset = ReviewDataset.load_and_vectorize(args["review_csv"])
vectorizer = dataset.get_vectorizer()

# model
classifier = Classifier(initial_n_channels=len(vectorizer.review_vocab),
                        n_classes=len(vectorizer.rating_vocab),
                        network_n_channels=args["num_channels"]).double()

# loss and optimizer
loss_func = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(classifier.parameters(), lr=args["learning_rate"])

Training loop:

for epoch_index in range(args["num_epochs"]):
    train_state["epoch_index"] = epoch_index
    
    # set up batch generator, initialize loss and 
    # accuracy each outer loop, set train mode on
    dataset.set_split("train")
    dataloader = DataLoader(dataset=dataset,
                            batch_size=args["batch_size"],
                            drop_last=args["drop_last"])
    running_loss = 0.0
    running_acc = 0.0
    classifier.train()
    
    for batch_index, batch_dict in enumerate(dataloader):
        # five-step training routine
        
        # diagnostic stats
        print("\n", "training loop diagnostics", "\n",
              "---------------------------------", "\n")
        print("batch tensor dimensions: ", batch_dict["x_data"].shape)
        print("labels: ", batch_dict["y_target"])
        
        # i. zero the gradients
        optimizer.zero_grad()
        
        # ii. compute the output
        y_pred = classifier.forward(x_in=batch_dict["x_data"].unsqueeze(dim=2))
        
        # iii. compute the loss
        loss = loss_func(y_pred, batch_dict["y_target"].float())
        loss_batch = loss.item()
        running_loss += (loss_batch - running_loss) / (batch_index + 1)
        
        # iv. use loss to produce gradients
        loss.backward()
        
        # v. use optimizer to take gradient step
        optimizer.step()
        
        # -----------------------------------
        # compute accuracy score
        acc_batch = compute_accuracy(y_pred, batch_dict["y_target"])
        running_acc += (acc_batch - running_acc) / (batch_index + 1)
        
    train_state["train_loss"].append(running_loss)
    train_state["train_acc"].append(running_acc)
    
    # iterate over validation dataset
    
    # set up batch generator, set loss and acc to
    # zero, and set eval mode on
    dataset.set_split("val")
    dataloader = DataLoader(dataset=dataset, batch_size=args.batch_size)
    running_loss = 0.0
    running_acc = 0.0
    classifier.eval()
    
    for batch_index, batch_dict in enumerate(dataloader):
        # i. compute output
        y_pred = classifier.forward(x_in=batch_dict["x_data"].unsqueeze(dim=2))
        
        # ii. compute loss
        loss = loss_func(y_pred, batch_dict["y_target"].float())
        loss_batch = loss.item()
        running_loss += (loss_batch - running_loss) / (batch_index + 1)
        
        # iii. compute accuracy
        acc_batch = compute_accuracy(y_pred, batch_dict["y_target"])
        running_acc += (acc_batch - running_acc) / (batch_index + 1)
        
    train_state["val_loss"].append(running_loss)
    train_state["val_acc"].append(running_acc)

Output:


training loop diagnostics 
 --------------------------------- 

batch tensor dimensions:  torch.Size([128, 7882])
labels:  tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])
classifier diagnostics 
 --------------------------------- 

classifier x_in size:  torch.Size([128, 7882, 1])
classifier weight size:  torch.Size([2, 128])

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-213-2327bb74133d> in <module>
     26 
     27         # ii. compute the output
---> 28         y_pred = classifier.forward(x_in=batch_dict["x_data"].unsqueeze(dim=2))
     29 
     30         # iii. compute the loss

<ipython-input-209-c8b508905fa0> in forward(self, x_in, apply_sigmoid)
     45         print("classifier weight size: ", self.fc.weight.size())
     46         features = self.network(x_in)
---> 47         prediction_vector = self.fc(features)
     48         if apply_sigmoid:
     49             prediction_vector = F.sigmoid(prediction_vector, dim=1)

... blah blah ...

RuntimeError: size mismatch, m1: [16384 x 1], m2: [128 x 2] at ../aten/src/TH/generic/THTensorMath.cpp:752 

Selected parameters/hyperparameters:

args = {
    ... blah blah ...
    # Model Hyperparameters
    "num_channels": 128,
    "kernel_size": 1,
    "stride": 1,
    # Training Hyperparameters
    "batch_size": 128,
    "early_stopping_criteria": 5,
    "learning_rate": 0.001,
    "num_epochs": 100,
    "drop_last": True
}

How can I fix the dimensions of my batch tensors so that they’re of the proper size?

Could you print the shape of features before passing them to self.fc?
Usually you would flatten the output of a conv layer, as currently features should have the shape [batch_size, channels, seq_length].

The output I got for features.size() is:

torch.Size([128, 128, 1])

What do you mean by seq_length ? Should I take the length of the longest review and make that seq_length for all vector representations of my data points?