Mat1 and mat2 shapes cannot be multiplied (64x3201 and 64x64)

I need some help. I’m having some problems with my code

class NLPModel(nn.Module):
    def __init__(self, num_users, num_books):
        super(NLPModel, self).__init__()
        self.user_embedding = nn.Embedding(num_users+1, 32)
        self.book_embedding = nn.Embedding(num_books+1, 32)
        self.concatenation_layer = nn.Linear(64, 32*2)
        self.review_embedding = nn.Sequential(
            nn.Embedding(5000, 32),
            nn.Flatten()
        )
        #self.dot_product = nn.Sequential(
            #nn.Flatten(),
            #nn.Linear(32*2, 1)
        #)
        self.dot_product = nn.Sequential(
            nn.Linear(32*2, 32*2),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(32*2, 1)
        )
        self.concat = nn.Sequential(
            nn.Linear(32+32, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(32, 1)
        )
    
    def forward(self, user_input, book_input, review_input):
       user_embedded = self.user_embedding(user_input)
       book_embedded = self.book_embedding(book_input)
       concatenated = torch.cat([user_embedded, book_embedded], dim=1)
       concatenated = self.concatenation_layer(concatenated)  # add this line
       dot_product = self.dot_product(concatenated)
       review_embedded = self.review_embedding(review_input)
       concat = torch.cat([review_embedded, dot_product], dim=1)
       return self.concat(concat)
class NLPDataset(Dataset):
    def __init__(self, data, tokenizer, vocab, max_length):
        self.users = torch.tensor(data['customer_id'].values)
        self.books = torch.tensor(data['product_id'].values)
        self.ratings = torch.tensor(data['star_rating'].values)
        
        self.review_body_padded = []
        for review in data['review_body']:
            tokens = [vocab[token] if token in vocab else vocab['<unk>'] for token in tokenizer(review)]
            indices = torch.tensor(tokens, dtype=torch.long)
            if indices.shape[0] > max_length:
                indices = indices[:max_length]
            padded_indices = torch.cat([indices, torch.zeros(max_length - indices.shape[0], dtype=torch.long)])
            self.review_body_padded.append(padded_indices)
        
        self.review_body_padded = torch.stack(self.review_body_padded).long()
        self.review_body_padded[self.review_body_padded == 0] = 1  
        self.review_body_padded = torch.log(self.review_body_padded + 1)

        
        self.vocab_size = len(vocab)

    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, idx):
        return self.users[idx], self.books[idx], self.ratings[idx], self.review_body_padded[idx]
tokenizer = get_tokenizer('basic_english')
texts = [text for text in df_nlp['review_body']]
vocab = build_vocab_from_iterator(map(tokenizer, texts), min_freq=1, specials=["<pad>", "<unk>"])
vocab.set_default_index(vocab["<unk>"])
vocab_size = len(vocab) 
max_length = 100
# set up data loaders
train_data = df_nlp.sample(frac=0.8)
test_data = df_nlp.drop(train_data.index)
train_dataset = NLPDataset(train_data, tokenizer, vocab, max_length)
test_dataset = NLPDataset(test_data, tokenizer, vocab, max_length)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
# set up model and optimizer
num_users = df_nlp['customer_id'].nunique()
num_books = df_nlp['product_id'].nunique()
model_nlp = NLPModel(num_users, num_books)
optimizer = optim.Adam(model_nlp.parameters(), lr=0.01)
criterion = nn.MSELoss()
num_epochs = 10

for epoch in range(num_epochs):
    train_loss = 0.0
    for user, book, rating, review in train_loader:
      optimizer.zero_grad()
      output = model_nlp(user.long(), book.long(), review.long().reshape(-1, 100))
      loss = criterion(output.squeeze(), rating.float())
      loss.backward()
      optimizer.step()
      train_loss += loss.item() * user.size(0)

    train_loss /= len(train_loader.dataset)
    print('Epoch: {}, Training Loss: {:.4f}'.format(epoch+1, train_loss))

and I keep getting this error

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-110-3bc9b0fc6272> in <module>
      5     for user, book, rating, review in train_loader:
      6       optimizer.zero_grad()
----> 7       output = model_nlp(user.long(), book.long(), review.long().reshape(-1, 100))
      8       loss = criterion(output.squeeze(), rating.float())
      9       loss.backward()

5 frames
/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1192         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194             return forward_call(*input, **kwargs)
   1195         # Do not call functions when jit is used
   1196         full_backward_hooks, non_full_backward_hooks = [], []

<ipython-input-105-ab06f5dda1a4> in forward(self, user_input, book_input, review_input)
     37        review_embedded = self.review_embedding(review_input)
     38        concat = torch.cat([review_embedded, dot_product], dim=1)
---> 39        return self.concat(concat)

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1192         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194             return forward_call(*input, **kwargs)
   1195         # Do not call functions when jit is used
   1196         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/container.py in forward(self, input)
    202     def forward(self, input):
    203         for module in self:
--> 204             input = module(input)
    205         return input
    206 

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1192         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194             return forward_call(*input, **kwargs)
   1195         # Do not call functions when jit is used
   1196         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    112 
    113     def forward(self, input: Tensor) -> Tensor:
--> 114         return F.linear(input, self.weight, self.bias)
    115 
    116     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x3201 and 64x64)

I have also used this code

# print the shapes of the tensors
print("user shape:", user.shape)
print("book shape:", book.shape)
print("review shape:", review.shape)
    
# print the first sample in each tensor
print("user:", user[0])
print("book:", book[0])
print("review:", review[0])

and got these results

user shape: torch.Size([64])
book shape: torch.Size([64])
review shape: torch.Size([64, 100])
user: tensor(15314, dtype=torch.int16)
book: tensor(11322, dtype=torch.int16)
review: tensor([ 9.5303,  3.8286,  5.6312,  2.7081,  5.3936,  3.7612,  1.3863, 11.3508,
         4.5951,  4.4188,  1.6094,  3.2581,  1.3863,  9.2828,  2.3979,  2.9444,
         3.1355,  5.4072,  2.0794,  3.4012,  4.5747,  5.0370,  1.0986,  9.5303,
         3.8286,  5.6312,  2.1972,  4.4188,  5.6240,  1.9459,  8.7178,  1.3863,
         8.5804,  1.9459,  2.8332,  6.9354,  1.6094,  3.2581,  8.0583,  3.8286,
         7.4782,  8.1654,  3.0445,  2.1972,  4.0775,  6.2285,  1.6094,  1.7918,
         7.5893,  5.6525,  2.0794,  4.7875,  2.6391,  6.4441,  2.0794,  1.3863,
         6.8987,  1.0986,  1.7918,  4.7958,  2.0794,  4.7875,  2.7081,  4.5747,
         5.0370,  1.6094,  8.1654,  7.5191,  7.0229,  5.7301,  1.3863,  4.6347,
         3.7377,  7.8675,  5.1930,  1.9459,  1.3863,  5.7838,  1.6094,  1.7918,
         8.5860,  2.8332,  8.0968,  2.3026,  5.5947,  5.3566,  3.8067,  4.7791,
         9.6541,  1.0986,  1.3863,  2.7726,  2.3026,  6.1181,  1.6094,  6.8448,
         1.6094,  7.1420,  1.7918,  4.2485])

Based on the error message the first linear layer in self.dot_product or self.concat raises the error as 64 input features are expected while the input activation contains 3201 features.
You could either change the in_features value of the corresponding linear layer to 3201 or make sure the activation contains only 64 features.

Now i have this error mat1 and mat2 shapes cannot be multiplied (64x64 and 32x1)
I have updated the code like this

class NLPModel(nn.Module):
    def __init__(self, num_users, num_books):
        super(NLPModel, self).__init__()
        self.user_embedding = nn.Embedding(num_users+1, 32)
        self.book_embedding = nn.Embedding(num_books+1, 32)
        self.concatenation_layer = nn.Linear(64, 32*2)
        self.review_embedding = nn.Sequential(
            nn.Embedding(5000, 32),
            nn.Flatten()
        )
 
        self.dot_product = nn.Sequential(
            nn.Linear(64, 32*2),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(32, 1)
        )
        self.concat = nn.Sequential(
            nn.Linear(32+32, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(32, 1)
        )
    
    def forward(self, user_input, book_input, review_input):
        user_embedded = self.user_embedding(user_input)
        book_embedded = self.book_embedding(book_input)
        concatenated = torch.cat([user_embedded, book_embedded], dim=1)
        concatenated = self.concatenation_layer(concatenated)  
        dot_product = self.dot_product(concatenated)
        review_embedded = self.review_embedding(review_input)
        concat = torch.cat([review_embedded, dot_product], dim=1)
        return self.concat(concat)
class NLPDataset(Dataset):
    def __init__(self, data, tokenizer, vocab, max_length):
        self.users = torch.tensor(data['customer_id'].values)
        self.books = torch.tensor(data['product_id'].values)
        self.ratings = torch.tensor(data['star_rating'].values)
        
        self.review_body_padded = []
        for review in data['review_body']:
            tokens = [vocab[token] if token in vocab else vocab['<unk>'] for token in tokenizer(review)]
            indices = torch.tensor(tokens, dtype=torch.long)
            if indices.shape[0] > max_length:
                indices = indices[:max_length]
            padded_indices = torch.cat([indices, torch.zeros(max_length - indices.shape[0], dtype=torch.long)], dim=0)
            self.review_body_padded.append(padded_indices)
        
        self.review_body_padded = torch.stack(self.review_body_padded).long()
        self.review_body_padded = torch.log(self.review_body_padded + 1)
        
        self.vocab_size = len(vocab)

    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, idx):
        return self.users[idx], self.books[idx], self.ratings[idx], self.review_body_padded[idx]
tokenizer = get_tokenizer('basic_english')
texts = [text for text in df_nlp['review_body']]
vocab = build_vocab_from_iterator(map(tokenizer, texts), min_freq=1, specials=["<pad>", "<unk>"])
vocab.set_default_index(vocab["<unk>"])
vocab_size = len(vocab) 
max_length = 100
# set up data loaders
train_data = df_nlp.sample(frac=0.8)
test_data = df_nlp.drop(train_data.index)
train_dataset = NLPDataset(train_data, tokenizer, vocab, max_length)
test_dataset = NLPDataset(test_data, tokenizer, vocab, max_length)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
# set up model and optimizer
num_users = df_nlp['customer_id'].nunique()
num_books = df_nlp['product_id'].nunique()
model_nlp = NLPModel(num_users, num_books)
optimizer = optim.Adam(model_nlp.parameters(), lr=0.01)
criterion = nn.MSELoss()
num_epochs = 10

for epoch in range(num_epochs):
    train_loss = 0.0
    for user, book, rating, review in train_loader:
      optimizer.zero_grad()
      output = model_nlp(user.long(), book.long(), review.long())
      loss = criterion(output.squeeze(), rating.float())
      loss.backward()
      optimizer.step()
      train_loss += loss.item() * user.size(0)

    train_loss /= len(train_loader.dataset)
    print('Epoch: {}, Training Loss: {:.4f}'.format(epoch+1, train_loss))

and got this error

RuntimeError                              Traceback (most recent call last)
<ipython-input-191-5581c569afb2> in <module>
      5     for user, book, rating, review in train_loader:
      6       optimizer.zero_grad()
----> 7       output = model_nlp(user.long(), book.long(), review.long())
      8       loss = criterion(output.squeeze(), rating.float())
      9       loss.backward()

5 frames
/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1192         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194             return forward_call(*input, **kwargs)
   1195         # Do not call functions when jit is used
   1196         full_backward_hooks, non_full_backward_hooks = [], []

<ipython-input-185-2f5cb88e83fc> in forward(self, user_input, book_input, review_input)
     31         concatenated = torch.cat([user_embedded, book_embedded], dim=1)
     32         concatenated = self.concatenation_layer(concatenated)
---> 33         dot_product = self.dot_product(concatenated)
     34         review_embedded = self.review_embedding(review_input)
     35         concat = torch.cat([review_embedded, dot_product], dim=1)

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1192         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194             return forward_call(*input, **kwargs)
   1195         # Do not call functions when jit is used
   1196         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/container.py in forward(self, input)
    202     def forward(self, input):
    203         for module in self:
--> 204             input = module(input)
    205         return input
    206 

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1192         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194             return forward_call(*input, **kwargs)
   1195         # Do not call functions when jit is used
   1196         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    112 
    113     def forward(self, input: Tensor) -> Tensor:
--> 114         return F.linear(input, self.weight, self.bias)
    115 
    116     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x64 and 32x1)

Note this is my first NLP with PyTorch

Kindly check the dimensions of the matrices - to multiply a m x n matrix with a x b matrix, n must be equal to a (n==a)

this is now the new error (ValueError: Using a target size (torch.Size([64])) that is different to the input size (torch.Size([64, 3264])) is deprecated. Please ensure they have the same size.)

class NLPModel(nn.Module):
    def __init__(self, num_users, num_books):
        super(NLPModel, self).__init__()
        self.user_embedding = nn.Embedding(num_users+1, 32)
        self.book_embedding = nn.Embedding(num_books+1, 32)
        self.concatenation_layer = nn.Linear(64, 64)
        self.review_embedding = nn.Sequential(
            nn.Embedding(5000, 32),
            nn.Flatten()
        )

        self.dot_product = nn.Sequential(
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 64)
        )
        self.concat = nn.Sequential(
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 64)
        )
    
    def forward(self, user_input, book_input, review_input):
        user_embedded = self.user_embedding(user_input)
        book_embedded = self.book_embedding(book_input)
        concatenated = torch.cat([user_embedded, book_embedded], dim=1)
        concatenated = self.concatenation_layer(concatenated)  
        dot_product = self.dot_product(concatenated)
        review_embedded = self.review_embedding(review_input)
        concat = torch.cat([review_embedded, dot_product], dim=1)
        return concat
class NLPDataset(Dataset):
    def __init__(self, data, tokenizer, vocab, max_length):
        self.users = torch.tensor(data['customer_id'].values)
        self.books = torch.tensor(data['product_id'].values)
        self.ratings = torch.tensor(data['star_rating'].values)
        
        self.review_body_padded = []
        for review in data['review_body']:
            tokens = [vocab[token] if token in vocab else vocab['<unk>'] for token in tokenizer(review)]
            indices = torch.tensor(tokens, dtype=torch.long)
            if indices.shape[0] > max_length:
                indices = indices[:max_length]
            padded_indices = torch.cat([indices, torch.zeros(max_length - indices.shape[0], dtype=torch.long)], dim=0)
            self.review_body_padded.append(padded_indices)
        
        self.review_body_padded = torch.stack(self.review_body_padded).long()
        self.review_body_padded = torch.log(self.review_body_padded + 1)
        
        self.vocab_size = len(vocab)

    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, idx):
        return self.users[idx], self.books[idx], self.ratings[idx], self.review_body_padded[idx]
tokenizer = get_tokenizer('basic_english')
texts = [text for text in df_nlp['review_body']]
vocab = build_vocab_from_iterator(map(tokenizer, texts), min_freq=1, specials=["<pad>", "<unk>"])
vocab.set_default_index(vocab["<unk>"])
vocab_size = len(vocab) 
max_length = 100
# set up data loaders
train_data = df_nlp.sample(frac=0.8)
test_data = df_nlp.drop(train_data.index)
train_dataset = NLPDataset(train_data, tokenizer, vocab, max_length)
test_dataset = NLPDataset(test_data, tokenizer, vocab, max_length)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
# set up model and optimizer
num_users = df_nlp['customer_id'].nunique()
num_books = df_nlp['product_id'].nunique()
model_nlp = NLPModel(num_users, num_books)
optimizer = optim.Adam(model_nlp.parameters(), lr=0.01)
criterion = nn.BCELoss()
num_epochs = 10

for epoch in range(num_epochs):
    train_loss = 0.0
    for user, book, rating, review in train_loader:
      optimizer.zero_grad()
      output = model_nlp(user.long(), book.long(), review.long())
      loss = criterion(output, rating.long())
      loss.backward()
      optimizer.step()
      train_loss += loss.item() * user.size(0)

    train_loss /= len(train_loader.dataset)
    print('Epoch: {}, Training Loss: {:.4f}'.format(epoch+1, train_loss))

and this is the new error

ValueError                                Traceback (most recent call last)
<ipython-input-137-f80539a6f64a> in <module>
      6       optimizer.zero_grad()
      7       output = model_nlp(user.long(), book.long(), review.long())
----> 8       loss = criterion(output, rating.long())
      9       loss.backward()
     10       optimizer.step()

2 frames
/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1192         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194             return forward_call(*input, **kwargs)
   1195         # Do not call functions when jit is used
   1196         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
    617 
    618     def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 619         return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
    620 
    621 

/usr/local/lib/python3.9/dist-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
   3084         reduction_enum = _Reduction.get_enum(reduction)
   3085     if target.size() != input.size():
-> 3086         raise ValueError(
   3087             "Using a target size ({}) that is different to the input size ({}) is deprecated. "
   3088             "Please ensure they have the same size.".format(target.size(), input.size())

ValueError: Using a target size (torch.Size([64])) that is different to the input size (torch.Size([64, 3264])) is deprecated. Please ensure they have the same size.

I think need to add a flatten layer but I don’t know where to add it

In line # 619 return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction) the dimensions of the input and target arguments passed to the function forward are not matching. You cannot compute the cross_entropy between, let’s say, 10 values and 20 values. That is what the error is indicating.

ok, how to fix this problem
if you have time you can check the code in colab
https://colab.research.google.com/drive/1BH-vsOyBMrQPWslQThuKmcHOi2ffJ6rs?usp=sharing