RuntimeError: mat1 and mat2 shapes cannot be multiplied (50x1280 and 2048x6)

class Classifier(pl.LightningModule):

  def __init__(self):
      super().__init__()

      self.MFB = MFB(512,768,True,192,64,0.1)
      self.fin_y_shape = torch.nn.Linear(768,512)
      self.fin_old = torch.nn.Linear(2048,2)
      self.fin_e = nn.Linear(16 * 768, 64)  # Adjusted input size to match the reshaped output from MFB
      self.fin_inten = torch.nn.Linear(2048,6)
      self.fin_e1 = torch.nn.Linear(2048,2)
      self.fin_e2 = torch.nn.Linear(2048,2)
      self.fin_e3 = torch.nn.Linear(2048,2)
      self.fin_e4 = torch.nn.Linear(2048,2)
      self.fin_e5 = torch.nn.Linear(2048,2)
      self.fin_e6 = torch.nn.Linear(2048,2)
      self.fin_e7 = torch.nn.Linear(2048,2)
      self.fin_e8 = torch.nn.Linear(2048,2)
      self.fin_e9 = torch.nn.Linear(2048,2)
      self.validation_step_outputs = []
      self.test_step_outputs = []

  def forward(self,x,y,rag):
      x_,y_,rag_ = x,y,rag
      z= torch.cat((x, y), dim=1)
      z_new = torch.squeeze(z, dim=1)
      c_inten = self.fin_inten(z_new)
      c_e1 = self.fin_e1(z_new)
      c_e2 = self.fin_e2(z_new)
      c_e3 = self.fin_e3(z_new)
      c_e4 = self.fin_e4(z_new)
      c_e5 = self.fin_e5(z_new)
      c_e6 = self.fin_e6(z_new)
      c_e7 = self.fin_e7(z_new)
      c_e8 = self.fin_e8(z_new)
      c_e9 = self.fin_e9(z_new)
      c = self.fin_old(z_new)

      # probability distribution over labels
      c = torch.log_softmax(c, dim=1)
      c_inten = torch.log_softmax(c_inten, dim=1)
      c_e1 = torch.log_softmax(c_e1, dim=1)
      c_e2 = torch.log_softmax(c_e2, dim=1)
      c_e3 = torch.log_softmax(c_e3, dim=1)
      c_e4 = torch.log_softmax(c_e4, dim=1)
      c_e5 = torch.log_softmax(c_e5, dim=1)
      c_e6 = torch.log_softmax(c_e6, dim=1)
      c_e7 = torch.log_softmax(c_e7, dim=1)
      c_e8 = torch.log_softmax(c_e8, dim=1)
      c_e9 = torch.log_softmax(c_e9, dim=1)

      return z,c,c_e1,c_e2,c_e3,c_e4,c_e5,c_e6,c_e7,c_e8,c_e9,c_inten

  def cross_entropy_loss(self, logits, labels):
    return F.nll_loss(logits, labels)


  def training_step(self, train_batch, batch_idx):
      lab,txt,rag,img,name,intensity,e1,e2,e3,e4,e5,e6,e7,e8,e9,t1,t2,t3,t4,t5,t6,t7 = train_batch
      lab = train_batch[lab]
      txt = train_batch[txt]
      rag = train_batch[rag]
      img = train_batch[img]
      name= train_batch[name]
      intensity = train_batch[intensity]
      e1 = train_batch[e1]
      e2 = train_batch[e2]
      e3 = train_batch[e3]
      e4 = train_batch[e4]
      e5 = train_batch[e5]
      e6 = train_batch[e6]
      e7 = train_batch[e7]
      e8 = train_batch[e8]
      e9 = train_batch[e9]
      t1,t2,t3,t4,t5,t6,t7 = torch.unsqueeze(batch[t1],1),torch.unsqueeze(batch[t2],1),\
      torch.unsqueeze(batch[t3],1),torch.unsqueeze(batch[t4],1),torch.unsqueeze(batch[t5],1),\
      torch.unsqueeze(batch[t6],1),torch.unsqueeze(batch[t7],1)
      gt_target = torch.cat((t1,t2,t3,t4,t5,t6),1)
      gt_emotion = torch.cat((torch.unsqueeze(e1,1),torch.unsqueeze(e2,1),torch.unsqueeze(e3,1),torch.unsqueeze(e4,1),torch.unsqueeze(e5,1),torch.unsqueeze(e6,1),\
                              torch.unsqueeze(e7,1),torch.unsqueeze(e8,1),torch.unsqueeze(e9,1)),1)

      z,logit_offen, a,b,c,d,e,f,g,h,i,j,k,l,m,logitinten_target,logit_sarcasm,logit_emotion= self.forward(txt,img,rag)

      loss1 = self.cross_entropy_loss(logit_offen, lab)
      #loss2 = self.cross_entropy_loss(logit_arou, arou)
      #loss3 = self.cross_entropy_loss(logit_val, val)
      loss4 = self.cross_entropy_loss(a, e1)
      loss5 = self.cross_entropy_loss(b, e2)
      loss6 = self.cross_entropy_loss(c, e3)
      loss7 = self.cross_entropy_loss(d, e4)
      loss8 = self.cross_entropy_loss(e, e5)
      loss9 = self.cross_entropy_loss(f, e6)

      loss17 = self.cross_entropy_loss(inten, intensity)

      loss18 = F.binary_cross_entropy_with_logits(logit_target.float(), gt_target.float())
      loss_emo_mult = F.binary_cross_entropy_with_logits(logit_emotion.float(), gt_emotion.float())


      #loss = loss1+loss_emo_mult+loss17
      loss=loss1+loss_emo_mult
      self.log('train_loss', loss)

      return loss


  def validation_step(self, val_batch, batch_idx):
      lab,txt,rag,img,name,e1,e2,e3,e4,e5,e6,e7,e8,e9,intensity,t1,t2,t3,t4,t5,t6 = val_batch

      #print(val_batch)
      lab = val_batch[lab]
      txt = val_batch[txt]
      img = val_batch[img]

      e1 = val_batch[e1]
      e2 = val_batch[e2]
      e3 = val_batch[e3]
      e4 = val_batch[e4]
      e5 = val_batch[e5]
      e6 = val_batch[e6]
      e7 = val_batch[e7]
      e8 = val_batch[e8]
      e9 = val_batch[e9]


      intensity = val_batch[intensity]
      t1,t2,t3,t4,t5,t6 = torch.unsqueeze(val_batch[t1],1),torch.unsqueeze(val_batch[t2],1),\
      torch.unsqueeze(val_batch[t3],1),torch.unsqueeze(val_batch[t4],1),torch.unsqueeze(val_batch[t5],1),\
      torch.unsqueeze(val_batch[t6],1)
      #print(t1.size())
      gt_target = torch.cat((t1,t2,t3,t4,t5,t6),1) #ground truth target
      gt_emotion = torch.cat((torch.unsqueeze(e1,1),torch.unsqueeze(e2,1),torch.unsqueeze(e3,1),torch.unsqueeze(e4,1),torch.unsqueeze(e5,1),torch.unsqueeze(e6,1),\
                              torch.unsqueeze(e7,1),torch.unsqueeze(e8,1),torch.unsqueeze(e9,1)),1)

      logits, a,b,c,d,e,f,g,h,i,j,k,l,m,inten,logit_target,logit_emotion = self.forward(txt,img,rag)





      tmp = np.argmax(logits.detach().cpu().numpy(),axis=-1)
      loss = self.cross_entropy_loss(logits, lab)
      lab = lab.detach().cpu().numpy()
      self.log('val_acc', f1_score(lab,tmp,average='macro'))
      #self.log('val_roc_auc',roc_auc_score(lab,tmp))
      self.log('val_loss', loss)
      tqdm_dict = {'val_acc': accuracy_score(lab,tmp)}
      #print('Val acc {}'.format(accuracy_score(lab,tmp)))
      return {
                'progress_bar': tqdm_dict,
              'val_loss_target': F.binary_cross_entropy_with_logits(logit_target.float(), gt_target.float()),
              'val_loss_emotion_multilabel': F.binary_cross_entropy_with_logits(logit_emotion.float(), gt_emotion.float()),
              'val_acc e1': accuracy_score(e1.detach().cpu().numpy(),np.argmax(a.detach().cpu().numpy(),axis=-1)),
      'val_acc e2': accuracy_score(e2.detach().cpu().numpy(),np.argmax(b.detach().cpu().numpy(),axis=-1)),
      'val_acc e3': accuracy_score(e3.detach().cpu().numpy(),np.argmax(c.detach().cpu().numpy(),axis=-1)),
      'val_acc e4': accuracy_score(e4.detach().cpu().numpy(),np.argmax(d.detach().cpu().numpy(),axis=-1)),
      'val_acc e5': accuracy_score(e5.detach().cpu().numpy(),np.argmax(e.detach().cpu().numpy(),axis=-1)),
      'val_acc e6': accuracy_score(e6.detach().cpu().numpy(),np.argmax(f.detach().cpu().numpy(),axis=-1)),
      'val_acc e7': accuracy_score(e7.detach().cpu().numpy(),np.argmax(g.detach().cpu().numpy(),axis=-1)),
      'val_acc e8': accuracy_score(e8.detach().cpu().numpy(),np.argmax(h.detach().cpu().numpy(),axis=-1)),
      'val_acc e9': accuracy_score(e9.detach().cpu().numpy(),np.argmax(i.detach().cpu().numpy(),axis=-1)),
      'val_acc intensity': f1_score(intensity.detach().cpu().numpy(),np.argmax(inten.detach().cpu().numpy(),axis=-1),average='macro'),
      'f1 sarcasm': f1_score(sarcasm.detach().cpu().numpy(),np.argmax(logit_sarcasm.detach().cpu().numpy(),axis=-1),average='macro')
      }

  def on_validation_epoch_end(self, validation_step_outputs):
    outs = []
    outs1,outs2,outs3,outs4,outs5,outs6,outs7,outs8,outs9,outs10,outs11,outs12,outs13,outs14,outs16,outs17 = \
    [],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]
    outs15 = []
    outs18 = []
    for out in validation_step_outputs:
      outs.append(out['progress_bar']['val_acc'])
      outs1.append(out['val_acc e1'])
      outs2.append(out['val_acc e2'])
      outs3.append(out['val_acc e3'])
      outs4.append(out['val_acc e4'])
      outs5.append(out['val_acc e5'])
      outs6.append(out['val_acc e6'])
      outs7.append(out['val_acc e7'])
      outs8.append(out['val_acc e8'])
      outs9.append(out['val_acc e9'])

      outs14.append(out['val_acc intensity'])
      outs15.append(out['val_loss_target'])
      outs16.append(out['val_loss_emotion_multilabel'])
      outs17.append(out['val_acc sarcasm'])
      outs18.append(out['f1 sarcasm'])
    self.log('val_acc_all_offn', sum(outs)/len(outs))
    self.log('val_loss_target', sum(outs15)/len(outs15))
    self.log('val_acc_all e1', sum(outs1)/len(outs1))
    self.log('val_acc_all e2', sum(outs2)/len(outs2))
    self.log('val_acc_all e3', sum(outs3)/len(outs3))
    self.log('val_acc_all e4', sum(outs4)/len(outs4))
    self.log('val_acc_all e5', sum(outs5)/len(outs5))
    self.log('val_acc_all e6', sum(outs6)/len(outs6))
    self.log('val_acc_all e7', sum(outs7)/len(outs7))
    self.log('val_acc_all e8', sum(outs8)/len(outs8))
    self.log('val_acc_all e9', sum(outs9)/len(outs9))

    self.log('val_acc_all inten', sum(outs14)/len(outs14))
    self.log('val_loss_all emo', sum(outs16)/len(outs16))


    print(f'***offensive f1 at epoch end {sum(outs)/len(outs)}****')
    #print(f'***val acc inten at epoch end {sum(outs14)/len(outs14)}****')
    print(f'***val loss emotion at epoch end {sum(outs16)/len(outs16)}****')
    #print(f'***val acc sarcasm at epoch end {sum(outs17)/len(outs17)}****')
    #print(f'***val f1 sarcasm at epoch end {sum(outs18)/len(outs18)}****')

  def test_step(self, batch, batch_idx):
      name,lab,txt,img,val,arou,e1,e2,e3,e4,e5,e6,e7,e8,e9,intensity,t1,t2,t3,t4,t5,t6 = batch
      name = batch[name]
      lab = batch[lab]
      txt = batch[txt]
      img = batch[img]
      e1 = batch[e1]
      e2 = batch[e2]
      e3 = batch[e3]
      e4 = batch[e4]
      e5 = batch[e5]
      e6 = batch[e6]
      e7 = batch[e7]
      e8 = batch[e8]
      e9 = batch[e9]

      intensity = batch[intensity]

      t1,t2,t3,t4,t5,t6,t7 = torch.unsqueeze(batch[t1],1),torch.unsqueeze(batch[t2],1),\
      torch.unsqueeze(batch[t3],1),torch.unsqueeze(batch[t4],1),torch.unsqueeze(batch[t5],1),\
      torch.unsqueeze(batch[t6],1)
      gt_target = torch.cat((t1,t2,t3,t4,t5,t6,t7),1)
      gt_emotion = torch.cat((torch.unsqueeze(e1,1),torch.unsqueeze(e2,1),torch.unsqueeze(e3,1),torch.unsqueeze(e4,1),torch.unsqueeze(e5,1),torch.unsqueeze(e6,1),\
                              torch.unsqueeze(e7,1),torch.unsqueeze(e8,1),torch.unsqueeze(e9,1)),1)

      logits, a,b,c,d,e,f,g,h,i,j,k,l,m,inten,logit_target,logit_emotion = self.forward(txt,img,rag)
      #self.log('val_acc 1', accuracy_score(lab.detach().cpu().numpy(),np.argmax(logits.detach().cpu().numpy(),axis=-1)))
      for n in name:
        N.append(n)
      append_gt(lab,o_t); append_gt(e1,e1_t); append_gt(e2,e2_t); append_gt(e3,e3_t); append_gt(e4,e4_t); append_gt(e5,e5_t);\
      append_gt(e6,e6_t); append_gt(e7,e7_t); append_gt(e8,e8_t); append_gt(e9,e9_t); append_gt(intensity,i_t);

      append_p(logits,o_p); append_p(a,e1_p); append_p(b,e2_p); append_p(c,e3_p); append_p(d,e4_p); append_p(e,e5_p);\
      append_p(f,e6_p); append_p(g,e7_p); append_p(h,e8_p); append_p(i,e9_p); append_p(inten,i_p);

      tmp = np.argmax(logits.detach().cpu().numpy(),axis=-1)
      loss = self.cross_entropy_loss(logits, lab)
      lab = lab.detach().cpu().numpy()
      self.log('test_acc', accuracy_score(lab,tmp))
      self.log('test f1',f1_score(sarcasm.detach().cpu().numpy(),np.argmax(logit_sarcasm.detach().cpu().numpy(),axis=-1),average='macro'))
      np.save('multitask_logit_emotion.npy',logit_emotion.detach().cpu().numpy())
      np.save('multitask_logit_offensive.npy',lab)
      np.save('multitask_logit_intensity.npy',inten.detach().cpu().numpy())
      #self.log('test confusion matrix',confusion_matrix(lab,tmp))
      #print(f'confusion matrix {confusion_matrix(sarcasm.detach().cpu().numpy(),np.argmax(logit_sarcasm.detach().cpu().numpy(),axis=-1))}')
      print(f'confusion matrix intensity {confusion_matrix(intensity.detach().cpu().numpy(),np.argmax(inten.detach().cpu().numpy(),axis=-1))}')
      print(f'confusion matrix offensive {confusion_matrix(lab,tmp)}')

      #self.log('test_roc_auc',roc_auc_score(sarcasm.detach().cpu().numpy(),np.argmax(logit_sarcasm.detach().cpu().numpy(),axis=-1)))
      #self.log('F1',f1_score(sarcasm.detach().cpu().numpy(),np.argmax(logit_sarcasm.detach().cpu().numpy(),axis=-1)))
      #self.log('recall',recall_score(sarcasm.detach().cpu().numpy(),np.argmax(logit_sarcasm.detach().cpu().numpy(),axis=-1)))
      #self.log('precision',precision_score(sarcasm.detach().cpu().numpy(),np.argmax(logit_sarcasm.detach().cpu().numpy(),axis=-1)))
      best_threshold = np.array([0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5])
      y_test = torch.nn.Sigmoid()(logit_emotion)
      y_test = y_test.detach().cpu().numpy()
      y_pred = np.array([[1 if y_test[i][j]>=best_threshold[j] else 0 for j in range(13)] for i in range(len(y_test))])
      #print(y_pred)
      total_correctly_predicted = len([i for i in range(len(y_test)) if (y_test[i]==y_pred[i]).sum() == 13])
      self.log('test_loss', loss)
      #print(total_correctly_predicted)
      pred_e = y_test
      return {'test_loss': loss,
              'test_loss_target': F.binary_cross_entropy_with_logits(logit_target.float(), gt_target.float()),
              'test_loss_emotion_multilabel': F.binary_cross_entropy_with_logits(logit_emotion.float(), gt_emotion.float()),
               'test_acc':f1_score(lab,tmp,average='macro'),
              'test_acc e1': accuracy_score(e1.detach().cpu().numpy(),np.argmax(a.detach().cpu().numpy(),axis=-1)),
              'test_acc e2': accuracy_score(e2.detach().cpu().numpy(),np.argmax(b.detach().cpu().numpy(),axis=-1)),
              'test_acc e3': accuracy_score(e3.detach().cpu().numpy(),np.argmax(c.detach().cpu().numpy(),axis=-1)),
              'test_acc e4': accuracy_score(e4.detach().cpu().numpy(),np.argmax(d.detach().cpu().numpy(),axis=-1)),
              'test_acc e5': accuracy_score(e5.detach().cpu().numpy(),np.argmax(e.detach().cpu().numpy(),axis=-1)),
              'test_acc e6': accuracy_score(e6.detach().cpu().numpy(),np.argmax(f.detach().cpu().numpy(),axis=-1)),
              'test_acc e7': accuracy_score(e7.detach().cpu().numpy(),np.argmax(g.detach().cpu().numpy(),axis=-1)),
              'test_acc e8': accuracy_score(e8.detach().cpu().numpy(),np.argmax(h.detach().cpu().numpy(),axis=-1)),
              'test_acc e9': accuracy_score(e9.detach().cpu().numpy(),np.argmax(i.detach().cpu().numpy(),axis=-1)),
              'test_acc inten': f1_score(intensity.detach().cpu().numpy(),np.argmax(inten.detach().cpu().numpy(),axis=-1),average='macro'),
              'test_acc sarcasm': accuracy_score(sarcasm.detach().cpu().numpy(),np.argmax(logit_sarcasm.detach().cpu().numpy(),axis=-1)),
              'f1 sarcasm': f1_score(sarcasm.detach().cpu().numpy(),np.argmax(logit_sarcasm.detach().cpu().numpy(),axis=-1),average='macro')}
  def test_epoch_end(self, outputs):
        # OPTIONAL
        outs = []
        outs1,outs2,outs3,outs4,outs5,outs6,outs7,outs8,outs9,outs10,outs11,outs12,outs13,outs14 = \
        [],[],[],[],[],[],[],[],[],[],[],[],[],[]
        outs15 = []
        outs16 = []
        outs17 = []
        outs18 = []
        for out in outputs:
          outs15.append(out['test_loss_target'])
          outs.append(out['test_acc'])
          outs1.append(out['test_acc e1'])
          outs2.append(out['test_acc e2'])
          outs3.append(out['test_acc e3'])
          outs4.append(out['test_acc e4'])
          outs5.append(out['test_acc e5'])
          outs6.append(out['test_acc e6'])
          outs7.append(out['test_acc e7'])
          outs8.append(out['test_acc e8'])
          outs9.append(out['test_acc e9'])

          outs14.append(out['test_acc inten'])
          outs16.append(out['test_acc sarcasm'])
          outs17.append(out['test_loss_emotion_multilabel'])


        #print(outs)
        self.log('final test f1', sum(outs)/len(outs))
        """
        self.log('test_acc_all e1', sum(outs1)/len(outs1))
        self.log('test_acc_all e2', sum(outs2)/len(outs2))
        self.log('test_acc_all e3', sum(outs3)/len(outs3))
        self.log('test_acc_all e4', sum(outs4)/len(outs4))
        self.log('test_acc_all e5', sum(outs5)/len(outs5))
        self.log('test_acc_all e6', sum(outs6)/len(outs6))
        self.log('test_acc_all e7', sum(outs7)/len(outs7))
        self.log('test_acc_all e8', sum(outs8)/len(outs8))
        self.log('test_acc_all e9', sum(outs9)/len(outs9))
        self.log('test_acc_all e10', sum(outs10)/len(outs10))
        self.log('test_acc_all e11', sum(outs11)/len(outs11))
        self.log('test_acc_all e12', sum(outs12)/len(outs12))
        self.log('test_acc_all e13', sum(outs13)/len(outs13))
        self.log('test_acc_all inten', sum(outs14)/len(outs14))
        self.log('test_loss_all target', sum(outs15)/len(outs15))
        self.log('test_acc_all sarcasm', sum(outs16)/len(outs16))
        """
        self.log('test_loss_all emo', sum(outs17)/len(outs17))
        #self.log('test_f1_all sarcasm', sum(outs18)/len(outs18))


  def configure_optimizers(self):
    optimizer = torch.optim.Adam(self.parameters(), lr=5e-3)
    return optimizer


class HmDataModule(pl.LightningDataModule):

  def setup(self, stage):



    self.hm_train = t_p
    self.hm_val = v_p
    self.hm_test = te_p


  def train_dataloader(self):
    return DataLoader(self.hm_train, batch_size=64)

  def val_dataloader(self):
    return DataLoader(self.hm_val, batch_size=64)

  def test_dataloader(self):
    return DataLoader(self.hm_test, batch_size=128)

data_module = HmDataModule()


checkpoint_callback = ModelCheckpoint(
     monitor='val_acc_all_offn',
     dirpath='noemo/ckpts/',
     filename='our-ds-ckpt-epoch{epoch:02d}-val_f1_all_offn{val_acc_all_offn:.2f}',
     auto_insert_metric_name=False,
     save_top_k=1,
    mode="max",
 )
all_callbacks = []
all_callbacks.append(checkpoint_callback)
# train
from pytorch_lightning import seed_everything
seed_everything(123, workers=True)
hm_model = Classifier()
gpus=1
#if torch.cuda.is_available():gpus=0
trainer = pl.Trainer(deterministic=True,max_epochs=60,precision=16,callbacks=all_callbacks)
trainer.fit(hm_model, data_module)
     

RuntimeError                              Traceback (most recent call last)
<ipython-input-89-89047a9bcf28> in <cell line: 384>()
    382 #if torch.cuda.is_available():gpus=0
    383 trainer = pl.Trainer(deterministic=True,max_epochs=60,precision=16,callbacks=all_callbacks)
--> 384 trainer.fit(hm_model, data_module)
    385 

14 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    112 
    113     def forward(self, input: Tensor) -> Tensor:
--> 114         return F.linear(input, self.weight, self.bias)
    115 
    116     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (50x1280 and 2048x6)

How we can Solve this Issue

Your code is not executable, so feel free to add the missing pieces or to isolate the issue further and post a new executable code snippet reproducing the issue.

With that being said, I would start with the parameter shape ([2048, 6]), which seems to point to self.fin_inten, so you might want to check the input activation shape to this layer first.