RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

Hi Ken_Jovan

could you find solution for your problem i faced the same thing. if possible could you share you experiences?

I have encountered a similar problem with a multitask model for audio event detection task with trackwise output:

from numpy.core.fromnumeric import shape
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.container import Sequential
from methods.utils.model_utilities_transfer import (Transfer_Cnn14, PositionalEncoding, init_layer) #( CustomCNN, OrthogonalConv2d,PositionalEncoding, DoubleCNN, init_layer)
import torchaudio
from methods.utils.transfer_doa import(Transfer_Cnn14_d)

class EINV2(nn.Module):
def init(self, cfg, dataset):
super().init()
self.pe_enable = False # Ture | False
self.in_channels= 4
self.in_channels_doa = 7
freeze_base = False
if cfg[‘data’][‘audio_feature’] == ‘logmel&intensity’:
self.f_bins = cfg[‘data’][‘n_mels’]
# self.in_channels_doa = 7
# self.in_channels_sed = 4

    self.downsample_ratio = 2 ** 2
    self.sed = nn.Sequential(
          Transfer_Cnn14(in_channels = 4,  classes_num = 14, freeze_base = False), #nn.AvgPool2d(kernel_size=(2, 2)
          nn.AvgPool2d(2, 2)

    )
    # self.sed = (Transfer_Cnn14(4,  classes_num = 14, freeze_base = False),
    #       nn.AvgPool2d(kernel_size=(2, 2))
    # )
    self.doa= nn.Sequential(
         Transfer_Cnn14_d(in_channels = 7,  classes_num = 3, freeze_base = False),
          nn.AvgPool2d(2, 2) 
    )
  
       
    self.pe = PositionalEncoding(pos_len=100, d_model=2048, pe_type='t', dropout=0.0)
    self.sed_trans_track1 = nn.TransformerEncoder(
        nn.TransformerEncoderLayer(d_model=2048, nhead=8, dim_feedforward=1024, dropout=0.2), num_layers=2)
    self.sed_trans_track2 = nn.TransformerEncoder(
        nn.TransformerEncoderLayer(d_model=2048, nhead=8, dim_feedforward=1024, dropout=0.2), num_layers=2)
    self.doa_trans_track1 = nn.TransformerEncoder(
        nn.TransformerEncoderLayer(d_model=2048, nhead=8, dim_feedforward=1024, dropout=0.2), num_layers=2)
    self.doa_trans_track2 = nn.TransformerEncoder(
        nn.TransformerEncoderLayer(d_model=2048, nhead=8, dim_feedforward=1024, dropout=0.2), num_layers=2)

    self.fc_sed_track1 = nn.Linear(1024, 14, bias=True)
    self.fc_sed_track2 = nn.Linear(1024, 14, bias=True)
    self.fc_doa_track1 = nn.Linear(1024, 3, bias=True)
    self.fc_doa_track2 = nn.Linear(1024, 3, bias=True)
    self.final_act_sed = nn.Sequential() # nn.Sigmoid()
    self.final_act_doa = nn.Tanh()

    self.init_weight()

    for param in Transfer_Cnn14.parameters(self):
        param.requires_grad = False
    if  freeze_base:
        # Freeze AudioSet pretrained layers
        for param in self.base.parameters():
            param.requires_grad = False

        self.init_weights()
    for param in Transfer_Cnn14_d.parameters(self):
        param.requires_grad = False
    if  freeze_base:
        # Freeze AudioSet pretrained layers
        for param in self.base.parameters():
            param.requires_grad = False

        self.init_weights()
# def init_weights(self):
#     init_layer(self) #.fc_transfer

def load_from_pretrain(self, pretrained_checkpoint_path):
    checkpoint = torch.load('/mnt/raid/ni/WALE_SEdl/EIN-SELD/Cnn14_DecisionLevelMax_mAP=0.385.pth') # pretrained_checkpoint_path
    self.base.load_state_dict(checkpoint['model']) #model

def forward(self, input,mixup_lambda=None):
    """Input: (batch_size, data_length)
    """
    output_dict = self.base(input, mixup_lambda)
    embedding = output_dict['embedding']
def init_weight(self):

    init_layer(self.fc_sed_track1)
    init_layer(self.fc_sed_track2)
    init_layer(self.fc_doa_track1)
    init_layer(self.fc_doa_track2)


def forward(self, x):
    """
    x: waveform, (batch_size, num_channels, data_length)
    """
    
    x_sed = x[:, :4] #4
    x_doa = x
      # fc
    x_sed_1 = self.final_act_sed(self.fc_sed_track1(x_sed))   #x_sed 
    x_sed_2 = self.final_act_sed(self.fc_sed_track2(x_sed))
    x_sed = torch.stack((x_sed_1, x_sed_2), 2)
    x_doa_1 = self.final_act_doa(self.fc_doa_track1(x_doa))
    x_doa_2 = self.final_act_doa(self.fc_doa_track2(x_doa))
    x_doa = torch.stack((x_doa_1, x_doa_2), 2)
    output = {
        'sed': x_sed,
        'doa': x_doa,
    }

    return output  

self.fc_sed_track1(x_sed) fails as 1024 input features are expected while x_sed contains 256.

Thanks, but how can I resolve this issue? please help. I have also observed that during model compilation, the number of parameters = 0, what is actually wrong? is there something odd about my model?

I have also the same problem, can you please help me with this. Here is my code:

Define the Vision Transformer model

class VisionTransformer(nn.Module):
def init(self, num_classes):
super(VisionTransformer, self).init()
# Define your ViT model architecture here

def forward(self, x):
    # Implement the forward pass of the ViT model here
    
    return x

Define the Convolutional Neural Network model

class CNN(nn.Module):
def init(self, num_classes):
super(CNN, self).init()
# Define your CNN model architecture here

def forward(self, x):
    # Implement the forward pass of the CNN model here
    
    return x

Define the combined model

batch_size = 2
feature_size = 150528
out_features = 5 # Number of classes

Define your model architecture

class CervicalCancerClassifier(nn.Module):
def init(self):
super(CervicalCancerClassifier, self).init()
self.vit = VisionTransformer(num_classes=out_features)
self.cnn = CNN(num_classes=out_features)
self.fc = nn.Linear(feature_size, out_features)

def forward(self, x):
    x = self.vit(x)
    x = self.cnn(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x

here is the error:
strong text

You didn’t post the error message but I assume you are seeing a shape mismatch which would point to the linear layer. Check the activation shape as well as the features the linear layer expects and adapt the latter.

here is the error:
RuntimeError: Given groups=1, weight of size [64, 768, 3, 3], expected input[32, 1000, 1, 1] to have 768 channels, but got 1000 channels instead

and here is the updated code:

Set the number of desired output channels

desired_output_channels = 12

Transformer encoder output size (adjust if needed)

transformer_output_size = 768

Define additional convolutional and fully connected layers

additional_conv = nn.Conv2d(in_channels=768, out_channels=64, kernel_size=3, stride=1, padding=1)
additional_fc = nn.Linear(in_features=64, out_features=desired_output_channels)

Concatenate ViT, Conv, and FC layers

class ExtendedViTModel(nn.Module):
def init(self, vit_model, additional_conv, additional_fc):
super(ExtendedViTModel, self).init()
self.vit_model = vit_model
self.additional_conv = additional_conv
self.additional_fc = additional_fc

def forward(self, x):
    # ViT forward pass
    vit_output = self.vit_model(x)
    
    # Additional convolutional layer
    vit_output = vit_output.unsqueeze(-1).unsqueeze(-1)  # Add height and width dimensions
    model.additional_conv.out_channels = 1000

    conv_output = self.additional_conv(vit_output)

    conv_output = nn.functional.adaptive_avg_pool2d(conv_output, (1, 1))

    # Flatten
    conv_output = conv_output.view(conv_output.size(0), -1)

    # Additional fully connected layer
    fc_output = self.additional_fc(conv_output)

    return fc_output

Create the extended model

pretrained_vit = ExtendedViTModel(pretrained_vit, additional_conv, additional_fc).to(device)

Uncomment for model output

print(pretrained_vit)

for i in range(0, 2034, 16):
for j in range(0, 2003, 16):
for k in range(0, 8182, 16):
sliced_data = crystal[i:i+16, j:j+16, k:k+16,:]
tensor_data=torch.as_tensor(sliced_data) #convert to tensor
tensor_data=torch.permute(tensor_data, (3, 0, 1, 2))
Y=model(tensor_data.float()) # pass through model

Error:
RuntimeError Traceback (most recent call last)
/scratch/14940477/ipykernel_1076915/2486182164.py in
5 tensor_data=torch.as_tensor(sliced_data) #convert to tensor
6 tensor_data=torch.permute(tensor_data, (3, 0, 1, 2))
----> 7 Y=model(tensor_data.float()) # pass through model
8 #vac conc
9 #x=i*vxl_size_x

/projects/academic/kreyes3/AtomicDL/YZTDL/model_training/my_env./lib/python3.9/site-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
→ 1518 return self._call_impl(*args, **kwargs)
1519
1520 def _call_impl(self, *args, **kwargs):

/projects/academic/kreyes3/AtomicDL/YZTDL/model_training/my_env./lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1527 return forward_call(*args, **kwargs)
1528
1529 try:

/projects/academic/kreyes3/AtomicDL/YZTDL/model_training/my_env./lib/python3.9/site-packages/vacancy_reconstruction/init.py in forward(self, x)
127
128 elif self.reconstruction_mode == ReconstructionMode.COUNTS:
→ 129 return self.head(z[-1]) # Dense network applied to latent features
130
131 else:

/projects/academic/kreyes3/AtomicDL/YZTDL/model_training/my_env./lib/python3.9/site-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
→ 1518 return self._call_impl(*args, **kwargs)
1519
1520 def _call_impl(self, *args, **kwargs):

/projects/academic/kreyes3/AtomicDL/YZTDL/model_training/my_env./lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1527 return forward_call(*args, **kwargs)
1528
1529 try:

/projects/academic/kreyes3/AtomicDL/YZTDL/model_training/my_env./lib/python3.9/site-packages/torch/nn/modules/container.py in forward(self, input)
213 def forward(self, input):
214 for module in self:
→ 215 input = module(input)
216 return input
217

/projects/academic/kreyes3/AtomicDL/YZTDL/model_training/my_env./lib/python3.9/site-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
→ 1518 return self._call_impl(*args, **kwargs)
1519
1520 def _call_impl(self, *args, **kwargs):

/projects/academic/kreyes3/AtomicDL/YZTDL/model_training/my_env./lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1527 return forward_call(*args, **kwargs)
1528
1529 try:

/projects/academic/kreyes3/AtomicDL/YZTDL/model_training/my_env./lib/python3.9/site-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) → Tensor:
→ 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) → str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (6x512 and 3072x4)

Your code is not properly formatted and hard to read. However, it seems the shape mismatch is raised in:

/projects/academic/kreyes3/AtomicDL/YZTDL/model_training/my_env./lib/python3.9/site-packages/vacancy_reconstruction/init.py in forward(self, x)
127
128 elif self.reconstruction_mode == ReconstructionMode.COUNTS:
→ 129 return self.head(z[-1]) # Dense network applied to latent features

so you might want to check which input self.head expects and why z does not fit.

RuntimeError: mat1 and mat2 shapes cannot be multiplied (8x645 and 215x215)

Here, i am providing few chunk of my code where it gives error regarding matrix multiplication, i am new here, i am trying but unable to figure out its solution. if possible please help me out.

BP  Weight_model start
Weight_classifier(
  (weight_layer): MaskedLinear(in_features=215, out_features=215, bias=True)
  (outlayer): Linear(in_features=215, out_features=215, bias=True)
)
batch_size_8,learning_rate_0.01,epoch_times_1
Traceback (most recent call last):
  File "/home/bvs/neelam/input_ourmodel/input/4valid.py", line 1012, in <module>
    validation(Terms[0], 5)
  File "/home/bvs/neelam/input_ourmodel/input/4valid.py", line 994, in validation
    each_fold_scores = Main(train_set, test_set, func=func)
  File "/home/bvs/neelam/input_ourmodel/input/4valid.py", line 884, in Main
    out = weight_model(weight_features)
  File "/home/bvs/miniconda3/envs/crisprcasfinder/envs/envML/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/bvs/neelam/input_ourmodel/input/4valid.py", line 314, in forward
    weight_out = self.weight_layer(weight_features)
  File "/home/bvs/miniconda3/envs/crisprcasfinder/envs/envML/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/bvs/neelam/input_ourmodel/input/4valid.py", line 331, in forward
    return F.linear(input, masked_weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (8x645 and 215x215)`

Here, I am providing link of all my input file that i am using in this model (4valid.py). (GitHub - neelam19051/DLmodel)

Thank you so much!

Double post from here.

Can Yo please help me solve this I am trying it from last three days

import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
class MFB(nn.Module):
    def __init__(self,img_feat_size, ques_feat_size,is_first, MFB_K, MFB_O, DROPOUT_R):
        super(MFB, self).__init__()
        #self.__C = __C
        self.MFB_K = MFB_K
        self.MFB_O = MFB_O
        self.DROPOUT_R = DROPOUT_R


        self.is_first = is_first
        self.proj_i = nn.Linear(img_feat_size, MFB_K * MFB_O)
        self.proj_q = nn.Linear(ques_feat_size, MFB_K * MFB_O)


        self.dropout = nn.Dropout(DROPOUT_R)
        self.pool = nn.AvgPool1d(MFB_K, stride = MFB_K)

    def forward(self, img_feat, ques_feat,exp_in=1):
        batch_size = img_feat.shape[0]
        img_feat = self.proj_i(img_feat)                # (N, C, K*O)
        ques_feat = self.proj_q(ques_feat)              # (N, 1, K*O)



        exp_out = img_feat * ques_feat           # (N, C, K*O)
        exp_out = self.dropout(exp_out) if self.is_first else self.dropout(exp_out * exp_in)     # (N, C, K*O)
        z = self.pool(exp_out) * self.MFB_K         # (N, C, O)
        z = torch.sqrt(F.relu(z)) - torch.sqrt(F.relu(-z))
        z = F.normalize(z.view(batch_size, -1))         # (N, C*O)
        z = z.view(batch_size, -1, self.MFB_O)      # (N, C, O)
        return z
class Classifier(pl.LightningModule):

  def __init__(self):
    super().__init__()
    self.MFB = MFB(512,768,True,256,64,0.1)
    self.loss_fn_emotion=torch.nn.KLDivLoss(reduction='batchmean',log_target=True)
    self.fin_y_shape = torch.nn.Linear(768,512)
    self.fin_old = torch.nn.Linear(64,2)
    self.fin = torch.nn.Linear(16 * 768, 64)
    self.fin_persuasive = torch.nn.Linear(16 * 768, 64)
    self.fin_e1 = torch.nn.Linear(16 * 768, 64)
    self.fin_e2 = torch.nn.Linear(16 * 768, 64)
    self.fin_e3 = torch.nn.Linear(16 * 768, 64)
    self.fin_e4 = torch.nn.Linear(16 * 768, 64)
    self.fin_e5 = torch.nn.Linear(16 * 768, 64)
    self.fin_e6 = torch.nn.Linear(16 * 768, 64)
    self.fin_e7 = torch.nn.Linear(16 * 768, 64)
    self.fin_e8 = torch.nn.Linear(16 * 768, 64)
    self.fin_e9 = torch.nn.Linear(16 * 768, 64)


    self.validation_step_outputs = []
    self.test_step_outputs = []

  def forward(self, x,y,rag):
      x_,y_,rag_ = x,y,rag
      print("x.shape", x.shape)
      z = self.MFB(torch.unsqueeze(y, axis=1), torch.unsqueeze(x, axis=1))
      #cross_attention= (rag and  x/y)
      z_new = torch.squeeze(z, dim=1)
      c = self.fin_old(z_new)
      c_e1 = self.fin_e1(torch.squeeze(z,dim=1))
      c_v = self.fin_persuasive(torch.squeeze(z,dim=1))
      c_e2 = self.fin_e2(torch.squeeze(z,dim=1))
      c_e3 = self.fin_e3(torch.squeeze(z,dim=1))
      c_e4 = self.fin_e4(torch.squeeze(z,dim=1))
      c_e5 = self.fin_e5(torch.squeeze(z,dim=1))
      c_e6 = self.fin_e6(torch.squeeze(z,dim=1))
      c_e7 = self.fin_e7(torch.squeeze(z,dim=1))
      c_e8 = self.fin_e8(torch.squeeze(z,dim=1))
      c_e9 = self.fin_e9(torch.squeeze(z,dim=1))

      c = torch.log_softmax(c, dim=1)
      c_v = torch.log_softmax(c_v, dim=1)
      c_e1 = torch.log_softmax(c_e1, dim=1)
      c_e2 = torch.log_softmax(c_e2, dim=1)
      c_e3 = torch.log_softmax(c_e3, dim=1)
      c_e4 = torch.log_softmax(c_e4, dim=1)

      c_e5 = torch.log_softmax(c_e5, dim=1)
      c_e6 = torch.log_softmax(c_e6, dim=1)
      c_e7 = torch.log_softmax(c_e7, dim=1)

      c_e8 = torch.log_softmax(c_e8, dim=1)
      c_e9 = torch.log_softmax(c_e9, dim=1)
      return z,c,c_v,c_e1,c_e2,c_e3,c_e4,c_e5,c_e6,c_e7,c_e8,c_e9

  def cross_entropy_loss(self, logits, labels):
    return F.nll_loss(logits, labels)

  def training_step(self, train_batch, batch_idx):
      lab,txt,rag,img,name,perin,per,iro,alli,ana,inv,meta,puns,sat,hyp= train_batch
      lab = train_batch[lab]
      #print(lab)
      name= train_batch[name]
      txt = train_batch[txt]
      rag = train_batch[rag]
      img = train_batch[img]
      perin = train_batch[perin]
      per = train_batch[per]
      iro= train_batch[iro]
      alli = train_batch[alli]
      ana = train_batch[ana]
      inv = train_batch[inv]
      meta = train_batch[meta]
      puns = train_batch[puns]
      sat = train_batch[sat]
      hyp = train_batch[hyp]

      gt_emotion = torch.cat((torch.unsqueeze(per,1),torch.unsqueeze(iro,1),torch.unsqueeze(alli,1),\
                              torch.unsqueeze(ana,1),torch.unsqueeze(inv,1),torch.unsqueeze(meta,1),\
                              torch.unsqueeze(puns,1),torch.unsqueeze(sat,1),torch.unsqueeze(hyp,1)),1)

      z,logit_offen,logit_perin,a,b,c,d,e,f,g,h,i= self.forward(txt,img,rag) # logit_target is logits of target


      # logit_offen= self.forward(txt,img,rag)
      loss23=self.cross_entropy_loss(logit_perin,perin)
      loss1 = self.cross_entropy_loss(logit_offen, lab)
      loss2 = self.cross_entropy_loss(a,per)
      loss3 = self.cross_entropy_loss(b,iro)
      loss4 = self.cross_entropy_loss(c, alli)
      loss5 = self.cross_entropy_loss(d,ana)
      loss6 = self.cross_entropy_loss(e,inv)
      loss7 = self.cross_entropy_loss(f,meta)
      loss8 = self.cross_entropy_loss(g,puns)
      loss9 = self.cross_entropy_loss(h,sat)
      loss10 = self.cross_entropy_loss(i,hyp)

      # loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6 + loss7 + loss8 +loss9 + loss10

      loss_emo_mult = F.binary_cross_entropy_with_logits(gt_emotion.float())


      loss=loss1+loss_emo_mult
      self.log('train_loss', loss)

      return loss


  def validation_step(self, val_batch, batch_idx):
      lab,txt,rag,img,name,perin,per,iro,alli,ana,inv,meta,puns,sat,hyp = val_batch
      lab = val_batch[lab]
      #print(lab)
      txt = val_batch[txt]
      rag = val_batch[rag]
      img = val_batch[img]
      name = val_batch[name]
      perin = val_batch[perin]
      per = val_batch[per]
      iro = val_batch[iro]
      alli = val_batch[alli]
      ana = val_batch[ana]
      inv = val_batch[inv]
      meta = val_batch[meta]
      puns = val_batch[puns]
      sat = val_batch[sat]
      hyp = val_batch[hyp]
      
      gt_emotion = torch.cat((torch.unsqueeze(per,1),torch.unsqueeze(iro,1),torch.unsqueeze(alli,1),\
                              torch.unsqueeze(ana,1),torch.unsqueeze(inv,1),torch.unsqueeze(meta,1),\
                              torch.unsqueeze(puns,1),torch.unsqueeze(sat,1),torch.unsqueeze(hyp,1)),1)

      logits,logit_perin,a,b,c,d,e,f,g,h,i = self.forward(txt,img,rag)


      # logits= self.forward(txt,img,rag)
      logits=logits.float()
      tmp = np.argmax(logits.detach().cpu().numpy(),axis=-1)
      loss = self.cross_entropy_loss(logits, lab)
      lab = lab.detach().cpu().numpy()
      self.log('val_acc', accuracy_score(lab,tmp))
      self.log('val_roc_auc',roc_auc_score(lab,tmp))
      self.log('val_loss', loss)
      tqdm_dict = {'val_acc': accuracy_score(lab,tmp)}
      self.validation_step_outputs.append({'progress_bar': tqdm_dict,'val_f1 offensive': f1_score(lab,tmp,average='macro')})

      return {
                'progress_bar': tqdm_dict,
      'val_f1 offensive': f1_score(lab,tmp,average='macro'),
      'val_f1 personification': f1_score(per,tmp,average='macro'),
      'val_f1 irony': f1_score(iro,tmp,average='macro'),
      'val_f1 alliteration': f1_score(alli,tmp,average='macro'),
      'val_f1 analogies': f1_score(ana,tmp,average='macro'),
      'val_f1 invective': f1_score(inv,tmp,average='macro'),
      'val_f1 metaphor': f1_score(meta,tmp,average='macro'),
      'val_f1 punsandplay': f1_score(puns,tmp,average='macro'),
      'val_f1 satire': f1_score(sat,tmp,average='macro'),
      'val_f1 hyperboles': f1_score(hyp,tmp,average='macro')

      }

  def on_validation_epoch_end(self):
    outs = []
    outs14=[]
    for out in self.validation_step_outputs:
       outs.append(out['progress_bar']['val_acc'])
       outs14.append(out['val_f1 offensive'])
    self.log('val_acc_all_offn', sum(outs)/len(outs))
    self.log('val_f1 offensive', sum(outs14)/len(outs14))
    print(f'***val_acc_all_offn at epoch end {sum(outs)/len(outs)}****')
    print(f'***val_f1 offensive at epoch end {sum(outs14)/len(outs14)}****')
    self.validation_step_outputs.clear()

  def test_step(self, batch, batch_idx):
    #   lab,txt,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12, e13,e14, e15,e16,img,name= batch
      lab,txt,rag,img,name,per,iro,alli,ana,inv,meta,puns,sat,hyp= batch

      lab = batch[lab]
      #print(lab)
      rag = batch[rag]

      txt = batch[txt]
      img = batch[img]
      name = batch[name]
      per = batch[per]
      iro = batch[iro]
      alli = batch[alli]
      ana = batch[ana]
      inv = batch[inv]
      meta = batch[meta]
      puns = batch[puns]
      sat = batch[sat]
      hyp = batch[hyp]

      gt_emotion = torch.cat((torch.unsqueeze(e1,1),torch.unsqueeze(e2,1),torch.unsqueeze(e3,1),torch.unsqueeze(e4,1),torch.unsqueeze(e5,1),torch.unsqueeze(e6,1),\
                              torch.unsqueeze(e7,1),torch.unsqueeze(e8,1),torch.unsqueeze(e9,1)),1)

      _,logits,a,b,c,d,e,f,g,h,i= self.forward(txt,img,rag)
      logits = logits.float()
      tmp = np.argmax(logits.detach().cpu().numpy(force=True),axis=-1)
      loss = self.cross_entropy_loss(logits, lab)
      lab = lab.detach().cpu().numpy()
      self.log('test_acc', accuracy_score(lab,tmp))
      self.log('test_roc_auc',roc_auc_score(lab,tmp))
      self.log('test_loss', loss)
      tqdm_dict = {'test_acc': accuracy_score(lab,tmp)}
      self.test_step_outputs.append({'progress_bar': tqdm_dict,'test_acc': accuracy_score(lab,tmp), 'test_f1_score': f1_score(lab,tmp,average='macro')})
      return {
                'progress_bar': tqdm_dict,
                'test_acc': accuracy_score(lab,tmp),
                'test_f1_score': f1_score(lab,tmp,average='macro'),
                'test_f1_score': f1_score(lab,tmp,average='macro'),
                'test_f1_score': f1_score(lab,tmp,average='macro'),
                'test_f1_score': f1_score(lab,tmp,average='macro'),
                'test_f1_score': f1_score(lab,tmp,average='macro'),
                'test_f1_score': f1_score(lab,tmp,average='macro'),
                'test_f1_score': f1_score(lab,tmp,average='macro')
      }
  def on_test_epoch_end(self):
      # OPTIONAL
      outs = []
      outs1,outs2,outs3,outs4,outs5,outs6,outs7,outs8,outs9,outs10,outs11,outs12,outs13,outs14 = \
      [],[],[],[],[],[],[],[],[],[],[],[],[],[]
      for out in self.test_step_outputs:
        outs.append(out['test_acc'])
        outs2.append(out['test_f1_score'])
      self.log('test_acc', sum(outs)/len(outs))
      self.log('test_f1_score', sum(outs2)/len(outs2))
      self.test_step_outputs.clear()

  def configure_optimizers(self):
    # optimizer = torch.optim.Adam(self.parameters(), lr=3e-2)
    optimizer = torch.optim.Adam(self.parameters(), lr=1e-5)

    return optimizer


"""
Main Model:
Initialize
Forward Pass
Training Step
Validation Step
Testing Step

Pp
"""

class HmDataModule(pl.LightningDataModule):

  def setup(self, stage):
    self.hm_train = t_p
    self.hm_val = v_p
    # self.hm_test = test
    self.hm_test = te_p

  def train_dataloader(self):
    return DataLoader(self.hm_train, batch_size=20, drop_last=True)

  def val_dataloader(self):
    return DataLoader(self.hm_val, batch_size=20, drop_last=True)

  def test_dataloader(self):
    return DataLoader(self.hm_test, batch_size=20, drop_last=True)

data_module = HmDataModule()
checkpoint_callback = ModelCheckpoint(
     monitor='val_acc_all_offn',
     dirpath='mrinal/',
     filename='epoch{epoch:02d}-val_f1_all_offn{val_acc_all_offn:.2f}',
     auto_insert_metric_name=False,
     save_top_k=1,
    mode="max",
 )
all_callbacks = []
all_callbacks.append(checkpoint_callback)
# train
from pytorch_lightning import seed_everything
seed_everything(42, workers=True)
hm_model = Classifier()
gpus=1
#if torch.cuda.is_available():gpus=0
trainer = pl.Trainer(deterministic=True,max_epochs=10,precision=16,callbacks=all_callbacks)
trainer.fit(hm_model, data_module)

30.0 M Total params
120.198 Total estimated model params size (MB)
Sanity Checking DataLoader 0: 0%
0/2 [00:00<?, ?it/s]
x.shape torch.Size([20, 768])

RuntimeError Traceback (most recent call last)
in <cell line: 285>()
283 #if torch.cuda.is_available():gpus=0
284 trainer = pl.Trainer(deterministic=True,max_epochs=10,precision=16,callbacks=all_callbacks)
→ 285 trainer.fit(hm_model, data_module)

14 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) → Tensor:
→ 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) → str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (20x64 and 12288x64)

I am not getting what’s wrong I am doing

It seems z should have a shape of [batch_size, -1, 64] here:

z = self.MFB(torch.unsqueeze(y, axis=1), torch.unsqueeze(x, axis=1))

based on:

z = z.view(batch_size, -1, self.MFB_O)      # (N, C, O)
return z

and will thus use 64 input features (dim1 is squeezed later).
The self.fin_eX layers however expect an activation input with 16 * 768 input features and will this fail.

sir,How should I solve this, can you please tell me little bit more.

import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from pathlib import Path
from typing import Union

class SineReLU(nn.Module):
def forward(self, x):
return torch.sin(F.relu(x))

class HighwayNetwork(nn.Module):
def init(self, size):
super().init()
self.W1 = nn.Linear(size, size)
self.W2 = nn.Linear(size, size)
self.W1.bias.data.fill_(0.)

def forward(self, x):
    x1 = self.W1(x)
    x2 = self.W2(x)
    g = torch.sigmoid(x2)
    # Replace F.relu(x1) with SineReLU in the HighwayNetwork class
    y = g * SineReLU()(x1) + (1. - g) * x
    return y

class Encoder(nn.Module):
def init(self, embed_dims, num_chars, encoder_dims, K, num_highways, dropout):
super().init()
prenet_dims = (encoder_dims, encoder_dims)
cbhg_channels = encoder_dims
self.embedding = nn.Embedding(num_chars, embed_dims)
self.pre_net = PreNet(embed_dims, fc1_dims=prenet_dims[0], fc2_dims=prenet_dims[1],
dropout=dropout)
self.cbhg = CBHG(K=K, in_channels=cbhg_channels, channels=cbhg_channels,
proj_channels=[cbhg_channels, cbhg_channels],
num_highways=num_highways)

def forward(self, x, speaker_embedding=None):
    x = self.embedding(x)
    x = self.pre_net(x)
    x.transpose_(1, 2)
    x = self.cbhg(x)
    if speaker_embedding is not None:
        x = self.add_speaker_embedding(x, speaker_embedding)
    return x

def add_speaker_embedding(self, x, speaker_embedding):
    # SV2TTS
    # The input x is the encoder output and is a 3D tensor with size (batch_size, num_chars, tts_embed_dims)
    # When training, speaker_embedding is also a 2D tensor with size (batch_size, speaker_embedding_size)
    #     (for inference, speaker_embedding is a 1D tensor with size (speaker_embedding_size))
    # This concats the speaker embedding for each char in the encoder output

    # Save the dimensions as human-readable names
    batch_size = x.size()[0]
    num_chars = x.size()[1]

    if speaker_embedding.dim() == 1:
        idx = 0
    else:
        idx = 1

    # Start by making a copy of each speaker embedding to match the input text length
    # The output of this has size (batch_size, num_chars * tts_embed_dims)
    speaker_embedding_size = speaker_embedding.size()[idx]
    e = speaker_embedding.repeat_interleave(num_chars, dim=idx)

    # Reshape it and transpose
    e = e.reshape(batch_size, speaker_embedding_size, num_chars)
    e = e.transpose(1, 2)

    # Concatenate the tiled speaker embedding with the encoder output
    x = torch.cat((x, e), 2)
    return x

class BatchNormConv(nn.Module):
def init(self, in_channels, out_channels, kernel, relu=True):
super().init()
self.conv = nn.Conv1d(in_channels, out_channels, kernel, stride=1, padding=kernel // 2, bias=False)
self.bnorm = nn.BatchNorm1d(out_channels)
self.relu = SineReLU()

def forward(self, x):
    x = self.conv(x)
    x = F.relu(x) if self.relu is True else x
    return self.bnorm(x)

class CBHG(nn.Module):
def init(self, K, in_channels, channels, proj_channels, num_highways):
super().init()

    # List of all rnns to call `flatten_parameters()` on
    self._to_flatten = []

    self.bank_kernels = [i for i in range(1, K + 1)]
    self.conv1d_bank = nn.ModuleList()
    for k in self.bank_kernels:
        conv = BatchNormConv(in_channels, channels, k)
        self.conv1d_bank.append(conv)

    self.maxpool = nn.MaxPool1d(kernel_size=2, stride=1, padding=1)

    self.conv_project1 = BatchNormConv(len(self.bank_kernels) * channels, proj_channels[0], 3)
    self.conv_project2 = BatchNormConv(proj_channels[0], proj_channels[1], 3, relu=False)

    # Fix the highway input if necessary
    if proj_channels[-1] != channels:
        self.highway_mismatch = True
        self.pre_highway = nn.Linear(proj_channels[-1], channels, bias=False)
    else:
        self.highway_mismatch = False

    self.highways = nn.ModuleList()
    for i in range(num_highways):
        hn = HighwayNetwork(channels)
        self.highways.append(hn)

    self.rnn = nn.GRU(channels, channels // 2, batch_first=True, bidirectional=True)
    self._to_flatten.append(self.rnn)

    # Avoid fragmentation of RNN parameters and associated warning
    self._flatten_parameters()

def forward(self, x):
    # Although we `_flatten_parameters()` on init, when using DataParallel
    # the model gets replicated, making it no longer guaranteed that the
    # weights are contiguous in GPU memory. Hence, we must call it again
    self._flatten_parameters()

    # Save these for later
    residual = x
    seq_len = x.size(-1)
    conv_bank = []

    # Convolution Bank
    for conv in self.conv1d_bank:
        c = conv(x) # Convolution
        conv_bank.append(c[:, :, :seq_len])

    # Stack along the channel axis
    conv_bank = torch.cat(conv_bank, dim=1)

    # dump the last padding to fit residual
    x = self.maxpool(conv_bank)[:, :, :seq_len]

    # Conv1d projections
    x = self.conv_project1(x)
    x = self.conv_project2(x)

    # Residual Connect
    x = x + residual

    # Through the highways
    x = x.transpose(1, 2)
    if self.highway_mismatch is True:
        x = self.pre_highway(x)
    for h in self.highways: x = h(x)

    # And then the RNN
    x, _ = self.rnn(x)
    return x

def _flatten_parameters(self):
    """Calls `flatten_parameters` on all the rnns used by the WaveRNN. Used
    to improve efficiency and avoid PyTorch yelling at us."""
    [m.flatten_parameters() for m in self._to_flatten]

class PreNet(nn.Module):
def init(self, in_dims, fc1_dims=256, fc2_dims=128, dropout=0.5):
super().init()
self.fc1 = nn.Linear(in_dims, fc1_dims)
self.fc2 = nn.Linear(fc1_dims, fc2_dims)
self.p = dropout

def forward(self, x):
    x = self.fc1(x)
    x = F.relu(x)
    x = F.dropout(x, self.p, training=True)
    x = self.fc2(x)
    x = F.relu(x)
    x = F.dropout(x, self.p, training=True)
    return x

class Attention(nn.Module):
def init(self, attn_dims):
super().init()
self.W = nn.Linear(attn_dims, attn_dims, bias=False)
self.v = nn.Linear(attn_dims, 1, bias=False)

def forward(self, encoder_seq_proj, query, t):

    # print(encoder_seq_proj.shape)
    # Transform the query vector
    query_proj = self.W(query).unsqueeze(1)

    # Compute the scores
    u = self.v(torch.tanh(encoder_seq_proj + query_proj))
    scores = F.softmax(u, dim=1)

    return scores.transpose(1, 2)

class LSA(nn.Module):
def init(self, attn_dim, kernel_size=31, filters=32):
super().init()
self.conv = nn.Conv1d(1, filters, padding=(kernel_size - 1) // 2, kernel_size=kernel_size, bias=True)
self.L = nn.Linear(filters, attn_dim, bias=False)
self.W = nn.Linear(attn_dim, attn_dim, bias=True) # Include the attention bias in this term
self.v = nn.Linear(attn_dim, 1, bias=False)
self.cumulative = None
self.attention = None

def init_attention(self, encoder_seq_proj):
    device = next(self.parameters()).device  # use same device as parameters
    b, t, c = encoder_seq_proj.size()
    self.cumulative = torch.zeros(b, t, device=device)
    self.attention = torch.zeros(b, t, device=device)

def forward(self, encoder_seq_proj, query, t, chars):

    if t == 0: self.init_attention(encoder_seq_proj)

    processed_query = self.W(query).unsqueeze(1)

    location = self.cumulative.unsqueeze(1)
    processed_loc = self.L(self.conv(location).transpose(1, 2))

    u = self.v(torch.tanh(processed_query + encoder_seq_proj + processed_loc))
    u = u.squeeze(-1)

    # Mask zero padding chars
    u = u * (chars != 0).float()

    # Smooth Attention
    # scores = torch.sigmoid(u) / torch.sigmoid(u).sum(dim=1, keepdim=True)
    scores = F.softmax(u, dim=1)
    self.attention = scores
    self.cumulative = self.cumulative + self.attention

    return scores.unsqueeze(-1).transpose(1, 2)

class Decoder(nn.Module):
# Class variable because its value doesn’t change between classes
# yet ought to be scoped by class because its a property of a Decoder
max_r = 20
def init(self, n_mels, encoder_dims, decoder_dims, lstm_dims,
dropout, speaker_embedding_size):
super().init()
self.register_buffer(“r”, torch.tensor(1, dtype=torch.int))
self.n_mels = n_mels
prenet_dims = (decoder_dims * 2, decoder_dims * 2)
self.prenet = PreNet(n_mels, fc1_dims=prenet_dims[0], fc2_dims=prenet_dims[1],
dropout=dropout)
self.attn_net = LSA(decoder_dims)
self.attn_rnn = nn.GRUCell(encoder_dims + prenet_dims[1] + speaker_embedding_size, decoder_dims)
self.rnn_input = nn.Linear(encoder_dims + decoder_dims + speaker_embedding_size, lstm_dims)
self.res_rnn1 = nn.LSTMCell(lstm_dims, lstm_dims)
self.res_rnn2 = nn.LSTMCell(lstm_dims, lstm_dims)
self.mel_proj = nn.Linear(lstm_dims, n_mels * self.max_r, bias=False)
self.stop_proj = nn.Linear(encoder_dims + speaker_embedding_size + lstm_dims, 1)

def zoneout(self, prev, current, p=0.1):
    device = next(self.parameters()).device  # Use same device as parameters
    mask = torch.zeros(prev.size(), device=device).bernoulli_(p)
    return prev * mask + current * (1 - mask)

def forward(self, encoder_seq, encoder_seq_proj, prenet_in,
            hidden_states, cell_states, context_vec, t, chars):

    # Need this for reshaping mels
    batch_size = encoder_seq.size(0)

    # Unpack the hidden and cell states
    attn_hidden, rnn1_hidden, rnn2_hidden = hidden_states
    rnn1_cell, rnn2_cell = cell_states

    # PreNet for the Attention RNN
    prenet_out = self.prenet(prenet_in)

    # Compute the Attention RNN hidden state
    attn_rnn_in = torch.cat([context_vec, prenet_out], dim=-1)
    attn_hidden = self.attn_rnn(attn_rnn_in.squeeze(1), attn_hidden)

    # Compute the attention scores
    scores = self.attn_net(encoder_seq_proj, attn_hidden, t, chars)

    # Dot product to create the context vector
    context_vec = scores @ encoder_seq
    context_vec = context_vec.squeeze(1)

    # Concat Attention RNN output w. Context Vector & project
    x = torch.cat([context_vec, attn_hidden], dim=1)
    x = self.rnn_input(x)

    # Compute first Residual RNN
    rnn1_hidden_next, rnn1_cell = self.res_rnn1(x, (rnn1_hidden, rnn1_cell))
    if self.training:
        rnn1_hidden = self.zoneout(rnn1_hidden, rnn1_hidden_next)
    else:
        rnn1_hidden = rnn1_hidden_next
    x = x + rnn1_hidden

    # Compute second Residual RNN
    rnn2_hidden_next, rnn2_cell = self.res_rnn2(x, (rnn2_hidden, rnn2_cell))
    if self.training:
        rnn2_hidden = self.zoneout(rnn2_hidden, rnn2_hidden_next)
    else:
        rnn2_hidden = rnn2_hidden_next
    x = x + rnn2_hidden

    # Project Mels
    mels = self.mel_proj(x)
    mels = mels.view(batch_size, self.n_mels, self.max_r)[:, :, :self.r]
    hidden_states = (attn_hidden, rnn1_hidden, rnn2_hidden)
    cell_states = (rnn1_cell, rnn2_cell)

    # Stop token prediction
    s = torch.cat((x, context_vec), dim=1)
    s = self.stop_proj(s)
    stop_tokens = torch.sigmoid(s)

    return mels, scores, hidden_states, cell_states, context_vec, stop_tokens

class Tacotron(nn.Module):
def init(self, embed_dims, num_chars, encoder_dims, decoder_dims, n_mels,
fft_bins, postnet_dims, encoder_K, lstm_dims, postnet_K, num_highways,
dropout, stop_threshold, speaker_embedding_size):
super().init()
self.n_mels = n_mels
self.lstm_dims = lstm_dims
self.encoder_dims = encoder_dims
self.decoder_dims = decoder_dims
self.speaker_embedding_size = speaker_embedding_size
self.encoder = Encoder(embed_dims, num_chars, encoder_dims,
encoder_K, num_highways, dropout)
self.encoder_proj = nn.Linear(encoder_dims + speaker_embedding_size, decoder_dims, bias=False)
self.decoder = Decoder(n_mels, encoder_dims, decoder_dims, lstm_dims,
dropout, speaker_embedding_size)
self.postnet = CBHG(postnet_K, n_mels, postnet_dims,
[postnet_dims, fft_bins], num_highways)
self.post_proj = nn.Linear(postnet_dims, fft_bins, bias=False)

    self.init_model()
    self.num_params()

    self.register_buffer("step", torch.zeros(1, dtype=torch.long))
    self.register_buffer("stop_threshold", torch.tensor(stop_threshold, dtype=torch.float32))

@property
def r(self):
    return self.decoder.r.item()

@r.setter
def r(self, value):
    self.decoder.r = self.decoder.r.new_tensor(value, requires_grad=False)

def forward(self, x, m, speaker_embedding):
    device = next(self.parameters()).device  # use same device as parameters

    self.step += 1
    batch_size, _, steps  = m.size()

    # Initialise all hidden states and pack into tuple
    attn_hidden = torch.zeros(batch_size, self.decoder_dims, device=device)
    rnn1_hidden = torch.zeros(batch_size, self.lstm_dims, device=device)
    rnn2_hidden = torch.zeros(batch_size, self.lstm_dims, device=device)
    hidden_states = (attn_hidden, rnn1_hidden, rnn2_hidden)

    # Initialise all lstm cell states and pack into tuple
    rnn1_cell = torch.zeros(batch_size, self.lstm_dims, device=device)
    rnn2_cell = torch.zeros(batch_size, self.lstm_dims, device=device)
    cell_states = (rnn1_cell, rnn2_cell)

    # <GO> Frame for start of decoder loop
    go_frame = torch.zeros(batch_size, self.n_mels, device=device)

    # Need an initial context vector
    context_vec = torch.zeros(batch_size, self.encoder_dims + self.speaker_embedding_size, device=device)

    # SV2TTS: Run the encoder with the speaker embedding
    # The projection avoids unnecessary matmuls in the decoder loop
    encoder_seq = self.encoder(x, speaker_embedding)
    print("Encoder sequence shape:", encoder_seq.shape)
    encoder_seq_proj = self.encoder_proj(encoder_seq)
    print(encoder_seq.shape, self.encoder_proj.weight.shape, self.encoder_proj.bias.shape)

    # Need a couple of lists for outputs
    mel_outputs, attn_scores, stop_outputs = [], [], []

    # Run the decoder loop
    for t in range(0, steps, self.r):
        prenet_in = m[:, :, t - 1] if t > 0 else go_frame
        mel_frames, scores, hidden_states, cell_states, context_vec, stop_tokens = \
            self.decoder(encoder_seq, encoder_seq_proj, prenet_in,
                         hidden_states, cell_states, context_vec, t, x)
        mel_outputs.append(mel_frames)
        attn_scores.append(scores)
        stop_outputs.extend([stop_tokens] * self.r)

    # Concat the mel outputs into sequence
    mel_outputs = torch.cat(mel_outputs, dim=2)

    # Post-Process for Linear Spectrograms
    postnet_out = self.postnet(mel_outputs)
    linear = self.post_proj(postnet_out)
    linear = linear.transpose(1, 2)

    # For easy visualisation
    attn_scores = torch.cat(attn_scores, 1)
    # attn_scores = attn_scores.cpu().data.numpy()
    stop_outputs = torch.cat(stop_outputs, 1)

    return mel_outputs, linear, attn_scores, stop_outputs

def generate(self, x, speaker_embedding=None, steps=2000):
    self.eval()
    device = next(self.parameters()).device  # use same device as parameters

    batch_size, _  = x.size()

    # Need to initialise all hidden states and pack into tuple for tidyness
    attn_hidden = torch.zeros(batch_size, self.decoder_dims, device=device)
    rnn1_hidden = torch.zeros(batch_size, self.lstm_dims, device=device)
    rnn2_hidden = torch.zeros(batch_size, self.lstm_dims, device=device)
    hidden_states = (attn_hidden, rnn1_hidden, rnn2_hidden)

    # Need to initialise all lstm cell states and pack into tuple for tidyness
    rnn1_cell = torch.zeros(batch_size, self.lstm_dims, device=device)
    rnn2_cell = torch.zeros(batch_size, self.lstm_dims, device=device)
    cell_states = (rnn1_cell, rnn2_cell)

    # Need a <GO> Frame for start of decoder loop
    go_frame = torch.zeros(batch_size, self.n_mels, device=device)

    # Need an initial context vector
    context_vec = torch.zeros(batch_size, self.encoder_dims + self.speaker_embedding_size, device=device)

    # SV2TTS: Run the encoder with the speaker embedding
    # The projection avoids unnecessary matmuls in the decoder loop
    encoder_seq = self.encoder(x, speaker_embedding)
    encoder_seq_proj = self.encoder_proj(encoder_seq)

    # Need a couple of lists for outputs
    mel_outputs, attn_scores, stop_outputs = [], [], []

    # Run the decoder loop
    for t in range(0, steps, self.r):
        prenet_in = mel_outputs[-1][:, :, -1] if t > 0 else go_frame
        mel_frames, scores, hidden_states, cell_states, context_vec, stop_tokens = \
        self.decoder(encoder_seq, encoder_seq_proj, prenet_in,
                     hidden_states, cell_states, context_vec, t, x)
        mel_outputs.append(mel_frames)
        attn_scores.append(scores)
        stop_outputs.extend([stop_tokens] * self.r)
        # Stop the loop when all stop tokens in batch exceed threshold
        if (stop_tokens > 0.5).all() and t > 10: break

    # Concat the mel outputs into sequence
    mel_outputs = torch.cat(mel_outputs, dim=2)

    # Post-Process for Linear Spectrograms
    postnet_out = self.postnet(mel_outputs)
    linear = self.post_proj(postnet_out)


    linear = linear.transpose(1, 2)

    # For easy visualisation
    attn_scores = torch.cat(attn_scores, 1)
    stop_outputs = torch.cat(stop_outputs, 1)

    self.train()

    return mel_outputs, linear, attn_scores

def init_model(self):
    for p in self.parameters():
        if p.dim() > 1: nn.init.xavier_uniform_(p)

def get_step(self):
    return self.step.data.item()

def reset_step(self):
    # assignment to parameters or buffers is overloaded, updates internal dict entry
    self.step = self.step.data.new_tensor(1)

def log(self, path, msg):
    with open(path, "a") as f:
        print(msg, file=f)

def load(self, path, optimizer=None):
    # Use device of model params as location for loaded state
    device = next(self.parameters()).device
    checkpoint = torch.load(str(path), map_location=device)
    self.load_state_dict(checkpoint["model_state"])

    if "optimizer_state" in checkpoint and optimizer is not None:
        optimizer.load_state_dict(checkpoint["optimizer_state"])

def save(self, path, optimizer=None):
    if optimizer is not None:
        torch.save({
            "model_state": self.state_dict(),
            "optimizer_state": optimizer.state_dict(),
        }, str(path))
    else:
        torch.save({
            "model_state": self.state_dict(),
        }, str(path))


def num_params(self, print_out=True):
    parameters = filter(lambda p: p.requires_grad, self.parameters())
    parameters = sum([np.prod(p.size()) for p in parameters]) / 1_000_000
    if print_out:
        print("Trainable Parameters: %.3fM" % parameters)
    return parameters

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1788x320 and 512x128)

—can someone guide me with this error?

RuntimeError: mat1 and mat2 shapes cannot be multiplied (10x2048 and 64x2)

I am trying to concat the X,Y and Rag Feature But it is Giving me erorr I have use the simple concat but it is Giving me error I just want to concat the x,y,rag feature in the forward function, can Anyone help me to solve the Problem

How do I fix an error when concatenating x, y, and rag in the forward function using torch.cat, ensuring matching dimensions and device types?

import torch
import torch.nn as nn
import torch.nn.functional as F
class MFB(nn.Module):
    def __init__(self,img_feat_size, ques_feat_size, is_first, MFB_K, MFB_O, DROPOUT_R):
        super(MFB, self).__init__()
        #self.__C = __C
        self.MFB_K = MFB_K
        self.MFB_O = MFB_O
        self.DROPOUT_R = DROPOUT_R

        self.is_first = is_first
        self.proj_i = nn.Linear(img_feat_size, MFB_K * MFB_O)
        self.proj_q = nn.Linear(ques_feat_size, MFB_K * MFB_O)

        self.dropout = nn.Dropout(DROPOUT_R)
        self.pool = nn.AvgPool1d(MFB_K, stride = MFB_K)

    def forward(self, img_feat, ques_feat, exp_in=1):
        batch_size = img_feat.shape[0]
        img_feat = self.proj_i(img_feat)                # (N, C, K*O)
        ques_feat = self.proj_q(ques_feat)              # (N, 1, K*O)

        exp_out = img_feat * ques_feat             # (N, C, K*O)
        exp_out = self.dropout(exp_out) if self.is_first else self.dropout(exp_out * exp_in)     # (N, C, K*O)
        z = self.pool(exp_out) * self.MFB_K         # (N, C, O)
        z = torch.sqrt(F.relu(z)) - torch.sqrt(F.relu(-z))
        z = F.normalize(z.view(batch_size, -1))         # (N, C*O)
        z = z.view(batch_size, -1, self.MFB_O)      # (N, C, O)
        return z


#MFB -> Multimodal Factorized Bilinear Pooling
#used to model complex interactions between features like image and text
#MFB_K -> Number Of factors, MFB_O -> Output size,
#Init initializes linear projection layers for image and question features , dropout layer and average pooling layer

#Forward:

#exp_in = input expansion factor (default - 1)
#Linear projection of image and question features to factorized bilinear form
#Element-wise multiplication of image and question features
#APply Dropout
#Average pooling along the factorized dimension (MFB_K) to reduce the size of the output tensor
#Element-wise operations to compute the final output (z) using square root and normalization using Relu.
#The final output represents the fused representation of image and question features.

data = data[~data['Name'].isin(outliers)]
len(sample_dataset_new)

torch.manual_seed(123)
t_p,v_p = torch.utils.data.random_split(sample_dataset_new,[450,50])

# torch.manual_seed(123)
t_p,te_p = torch.utils.data.random_split(t_p,[340,110])

t_p[1]["processed_img"].shape
t_p[1]['processed_txt'].shape
t_p[1]['processed_rag'].shape

(768,)
    class Classifier(pl.LightningModule):

    def __init__(self):
      super().__init__()
      self.MFB = MFB(512,768,True,256,64,0.1)
      self.fin_y_shape = torch.nn.Linear(768,512)
      self.fin_old = torch.nn.Linear(64,2)
      self.fin = torch.nn.Linear(16 * 768, 64)
      self.fin_inten = torch.nn.Linear(2048,6)
      self.fin_e1 = torch.nn.Linear(64,2)
      self.fin_e2 = torch.nn.Linear(64,2)
      self.fin_e3 = torch.nn.Linear(64,2)
      self.fin_e4 = torch.nn.Linear(64,2)
      self.fin_e5 = torch.nn.Linear(64,2)
      self.fin_e6 = torch.nn.Linear(64,2)
      self.fin_e7 = torch.nn.Linear(64,2)
      self.fin_e8 = torch.nn.Linear(64,2)
      self.fin_e9 = torch.nn.Linear(64,2)
      # self.reduce_x = torch.nn.Linear(768, 512)
      # self.reduce_rag = torch.nn.Linear(768, 512)



      self.validation_step_outputs = []
      self.test_step_outputs = []


    def forward(self, x,y,rag):
        x_,y_,rag_ = x,y,rag
        print("x.shape", x.shape)
        print("y.shape",y.shape)
        print("rag.shape",rag.shape)

        # x = self.reduce_x(x)
        # rag = self.reduce_rag(rag)

        # print("x.shape", x.shape)
        # print("y.shape",y.shape)
        # print("rag.shape",rag.shape)
        # z = self.MFB(torch.unsqueeze(y, axis=1), torch.unsqueeze(rag, axis=1))
        # z_rag = self.MFB(torch.unsqueeze(y, axis=1),torch.unsqueeze(rag, axis=1))
        # z_con = torch.cat((z, z_rag), dim=1)


        # Concatenate x with y and then with rag


        z= torch.cat((torch.cat((x, y), dim=1), rag), dim=1)


        # Pass concatenated x with y and x with rag through your network
        z_new = torch.squeeze(z,dim=1)
        print("z_new shape",z_new)


        c_inten = self.fin_inten(z_new)
        c_e1 = self.fin_e1(z_new)
        c_e2 = self.fin_e2(z_new)
        c_e3 = self.fin_e3(z_new)
        c_e4 = self.fin_e4(z_new)
        c_e5 = self.fin_e5(z_new)
        c_e6 = self.fin_e6(z_new)
        c_e7 = self.fin_e7(z_new)
        c_e8 = self.fin_e8(z_new)
        c_e9 = self.fin_e9(z_new)
        c = self.fin_old(z_new)

        # print("z.shape",z.shape)
        # print("z_new shape",z_new.shape)
        # print("intensity error:", c_inten.shape)
        # print("output:", c.shape)
        # print("c_e1:", c_e1.shape)
        # print("c_e2:", c_e2.shape)
        # print("c_e3:", c_e3.shape)
        # print("c_e4:", c_e4.shape)
        # print("c_e5:", c_e5.shape)
        # print("c_e6:", c_e6.shape)
        # print("c_e7:", c_e7.shape)
        # print("c_e8:", c_e8.shape)
        # print("c_e9:", c_e9.shape)
        # print("logits.shape",logits.shape)


        output = torch.log_softmax(c, dim=1)
        c_inten = torch.log_softmax(c_inten, dim=1)
        c_e1 = torch.log_softmax(c_e1, dim=1)
        c_e2 = torch.log_softmax(c_e2, dim=1)
        c_e3 = torch.log_softmax(c_e3, dim=1)
        c_e4 = torch.log_softmax(c_e4, dim=1)
        c_e5 = torch.log_softmax(c_e5, dim=1)
        c_e6 = torch.log_softmax(c_e6, dim=1)
        c_e7 = torch.log_softmax(c_e7, dim=1)
        c_e8 = torch.log_softmax(c_e8, dim=1)
        c_e9 = torch.log_softmax(c_e9, dim=1)

        return output,c_inten,c_e1,c_e2,c_e3,c_e4,c_e5,c_e6,c_e7,c_e8,c_e9


    def cross_entropy_loss(self, logits, labels):
      print("logits.shape",logits.shape)
      return F.nll_loss(logits, labels)

    def training_step(self, train_batch, batch_idx):
        #lab,txt,rag,img,name,per,iro,alli,ana,inv,meta,puns,sat,hyp= train_batch
        lab,txt,rag,img,name,intensity,e1,e2,e3,e4,e5,e6,e7,e8,e9= train_batch
        #logit_offen,a,b,c,d,e,f,g,h,i,logit_inten_target= self.forward(txt,img,rag)

        lab = train_batch[lab].unsqueeze(1)
        #print(lab)
        txt = train_batch[txt]
        rag = train_batch[rag]
        img = train_batch[img]
        name= train_batch[name]
        intensity = train_batch[intensity].unsqueeze(1)
        e1 = train_batch[e1].unsqueeze(1)
        e2 = train_batch[e2].unsqueeze(1)
        e3 = train_batch[e3].unsqueeze(1)
        e4 = train_batch[e4].unsqueeze(1)
        e5 = train_batch[e5].unsqueeze(1)
        e6 = train_batch[e6].unsqueeze(1)
        e7 = train_batch[e7].unsqueeze(1)
        e8 = train_batch[e8].unsqueeze(1)
        e9 = train_batch[e9].unsqueeze(1)

        lab = F.one_hot(lab, num_classes=2)
        intensity = torch.abs(intensity)
        intensity = F.one_hot(intensity, num_classes=6)  # Assuming you have 6 classes
        e1 = F.one_hot(e1,num_classes = 2)
        e2 = F.one_hot(e2,num_classes = 2)
        e3 = F.one_hot(e3,num_classes = 2)
        e4 = F.one_hot(e4,num_classes = 2)
        e5 = F.one_hot(e5,num_classes = 2)
        e6 = F.one_hot(e6,num_classes = 2)
        e7 = F.one_hot(e7,num_classes = 2)
        e8 = F.one_hot(e8,num_classes = 2)
        e9 = F.one_hot(e9,num_classes = 2)

        lab = lab.squeeze(dim=1)
        intensity = intensity.squeeze(dim=1)
        e1 = e1.squeeze(dim=1)
        e2 = e2.squeeze(dim=1)
        e3 = e3.squeeze(dim=1)
        e4 = e4.squeeze(dim=1)
        e5 = e5.squeeze(dim=1)
        e6 = e6.squeeze(dim=1)
        e7 = e7.squeeze(dim=1)
        e8 = e8.squeeze(dim=1)
        e9 = e9.squeeze(dim=1)

        logit_offen,logit_inten_target,a,b,c,d,e,f,g,h,i= self.forward(txt,img,rag)

        loss1 = self.cross_entropy_loss(logit_offen, lab)
        loss17 = self.cross_entropy_loss(logit_inten_target, intensity)
        loss4 = self.cross_entropy_loss(a, e1)
        loss5 = self.cross_entropy_loss(b, e2)
        loss6 = self.cross_entropy_loss(c, e3)
        loss7 = self.cross_entropy_loss(d, e4)
        loss8 = self.cross_entropy_loss(e, e5)
        loss9 = self.cross_entropy_loss(f, e6)
        loss10 = self.cross_entropy_loss(g, e7)
        loss11 = self.cross_entropy_loss(h, e8)
        loss12 = self.cross_entropy_loss(i, e9)

        loss = loss1 + loss4 + loss5 + loss6 + loss7 + loss8 +loss9 + loss10 +loss11 +loss12 + loss17

        self.log('train_loss', loss)
        return loss


    def validation_step(self, val_batch, batch_idx):
        #lab,txt,rag,img,name,per,iro,alli,ana,inv,meta,puns,sat,hyp = val_batch
        lab,txt,rag,img,name,intensity,e1,e2,e3,e4,e5,e6,e7,e8,e9= val_batch
        lab = val_batch[lab].unsqueeze(1)
        #print(lab)
        txt = val_batch[txt]
        rag = val_batch[rag]
        img = val_batch[img]
        name = val_batch[name]
        intensity = val_batch[intensity].unsqueeze(1)
        e1 = val_batch[e1].unsqueeze(1)
        e2 = val_batch[e2].unsqueeze(1)
        e3 = val_batch[e3].unsqueeze(1)
        e4 = val_batch[e4].unsqueeze(1)
        e5 = val_batch[e5].unsqueeze(1)
        e6 = val_batch[e6].unsqueeze(1)
        e7 = val_batch[e7].unsqueeze(1)
        e8 = val_batch[e8].unsqueeze(1)
        e9 = val_batch[e9].unsqueeze(1)

        lab = F.one_hot(lab, num_classes=2)

        intensity = torch.abs(intensity)
        intensity = F.one_hot(intensity, num_classes=6)
        e1 = F.one_hot(e1,num_classes = 2)
        e2 = F.one_hot(e2,num_classes = 2)
        e3 = F.one_hot(e3,num_classes = 2)
        e4 = F.one_hot(e4,num_classes = 2)
        e5 = F.one_hot(e5,num_classes = 2)
        e6 = F.one_hot(e6,num_classes = 2)
        e7 = F.one_hot(e7,num_classes = 2)
        e8 = F.one_hot(e8,num_classes = 2)
        e9 = F.one_hot(e9,num_classes = 2)
        lab = lab.squeeze(dim=1)


        intensity = intensity.squeeze(dim = 1)
        e1 = e1.squeeze(dim=1)
        e2 = e2.squeeze(dim=1)
        e3 = e3.squeeze(dim=1)
        e4 = e4.squeeze(dim=1)
        e5 = e5.squeeze(dim=1)
        e6 = e6.squeeze(dim=1)
        e7 = e7.squeeze(dim=1)
        e8 = e8.squeeze(dim=1)
        e9 = e9.squeeze(dim=1)

        logits,inten,a,b,c,d,e,f,g,h,i = self.forward(txt,img,rag)

        logits=logits.float()

        tmp = np.argmax(logits.detach().cpu().numpy(),axis=1)
        loss = self.cross_entropy_loss(logits, lab)
        lab = lab.detach().cpu().numpy()
        self.log('val_acc', accuracy_score(lab,tmp))
        self.log('val_roc_auc',roc_auc_score(lab,tmp))
        self.log('val_loss', loss)
        tqdm_dict = {'val_acc': accuracy_score(lab,tmp)}
        self.validation_step_outputs.append({'progress_bar': tqdm_dict,'val_f1 offensive': f1_score(lab,tmp,average='macro')})

        return {
                  'progress_bar': tqdm_dict,
        'val_f1 offensive': f1_score(lab,tmp,average='macro')
        }

    def on_validation_epoch_end(self):
      outs = []
      outs14=[]
      for out in self.validation_step_outputs:
        outs.append(out['progress_bar']['val_acc'])
        outs14.append(out['val_f1 offensive'])
      self.log('val_acc_all_offn', sum(outs)/len(outs))
      self.log('val_f1 offensive', sum(outs14)/len(outs14))
      print(f'***val_acc_all_offn at epoch end {sum(outs)/len(outs)}****')
      print(f'***val_f1 offensive at epoch end {sum(outs14)/len(outs14)}****')
      self.validation_step_outputs.clear()

    def test_step(self, batch, batch_idx):
        lab,txt,rag,img,name,intensity,e1,e2,e3,e4,e5,e6,e7,e8,e9= batch
        lab = batch[lab].unsqueeze(1)
        #print(lab)
        txt = batch[txt]
        rag = batch[rag]
        img = batch[img]
        name = batch[name]
        intensity = batch[intensity].unsqueeze(1)
        e1 = batch[e1].unsqueeze(1)
        e2 = batch[e2].unsqueeze(1)
        e3 = batch[e3].unsqueeze(1)
        e4 = batch[e4].unsqueeze(1)
        e5 = batch[e5].unsqueeze(1)
        e6 = batch[e6].unsqueeze(1)
        e7 = batch[e7].unsqueeze(1)
        e8 = batch[e8].unsqueeze(1)
        e9 = batch[e9].unsqueeze(1)
        lab = F.one_hot(lab, num_classes=2)
        intensity = F.one_hot(intensity, num_classes=6)
        e1 = F.one_hot(e1,num_classes = 2)
        e2 = F.one_hot(e2,num_classes = 2)
        e3 = F.one_hot(e3,num_classes = 2)
        e4 = F.one_hot(e4,num_classes = 2)
        e5 = F.one_hot(e5,num_classes = 2)
        e6 = F.one_hot(e6,num_classes = 2)
        e7 = F.one_hot(e7,num_classes = 2)
        e8 = F.one_hot(e8,num_classes = 2)
        e9 = F.one_hot(e9,num_classes = 2)
        lab = lab.squeeze(dim=1)
        intensity = intensity.squeeze(dim=1)
        e1 = e1.squeeze(dim=1)
        e2 = e2.squeeze(dim=1)
        e3 = e3.squeeze(dim=1)
        e4 = e4.squeeze(dim=1)
        e5 = e5.squeeze(dim=1)
        e6 = e6.squeeze(dim=1)
        e7 = e7.squeeze(dim=1)
        e8 = e8.squeeze(dim=1)
        e9 = e9.squeeze(dim=1)

        logits,inten,a,b,c,d,e,f,g,h,i= self.forward(txt,img,rag)

        logits = logits.float()
        tmp = np.argmax(logits.detach().cpu().numpy(force=True),axis=-1)
        loss = self.cross_entropy_loss(logits, lab)
        lab = lab.detach().cpu().numpy()
        self.log('test_acc', accuracy_score(lab,tmp))
        self.log('test_roc_auc',roc_auc_score(lab,tmp))
        self.log('test_loss', loss)
        tqdm_dict = {'test_acc': accuracy_score(lab,tmp)}
        self.test_step_outputs.append({'progress_bar': tqdm_dict,'test_acc': accuracy_score(lab,tmp), 'test_f1_score': f1_score(lab,tmp,average='macro')})
        return {
                  'progress_bar': tqdm_dict,
                  'test_acc': accuracy_score(lab,tmp),
                  'test_f1_score': f1_score(lab,tmp,average='macro')
        }
    def on_test_epoch_end(self):
        # OPTIONAL
        outs = []
        outs1,outs2,outs3,outs4,outs5,outs6,outs7,outs8,outs9,outs10,outs11,outs12,outs13,outs14 = \
        [],[],[],[],[],[],[],[],[],[],[],[],[],[]
        for out in self.test_step_outputs:
          outs.append(out['test_acc'])
          outs2.append(out['test_f1_score'])
        self.log('test_acc', sum(outs)/len(outs))
        self.log('test_f1_score', sum(outs2)/len(outs2))
        self.test_step_outputs.clear()

    def configure_optimizers(self):
      # optimizer = torch.optim.Adam(self.parameters(), lr=3e-2)
      optimizer = torch.optim.Adam(self.parameters(), lr=1e-5)

      return optimizer


  """
  Main Model:
  Initialize
  Forward Pass
  Training Step
  Validation Step
  Testing Step

  Pp
  """

  class HmDataModule(pl.LightningDataModule):

    def setup(self, stage):
      self.hm_train = t_p
      self.hm_val = v_p
      # self.hm_test = test
      self.hm_test = te_p

    def train_dataloader(self):
      return DataLoader(self.hm_train, batch_size=10, drop_last=True)

    def val_dataloader(self):
      return DataLoader(self.hm_val, batch_size=10, drop_last=True)

    def test_dataloader(self):
      return DataLoader(self.hm_test, batch_size=10, drop_last=True)

  data_module = HmDataModule()
  checkpoint_callback = ModelCheckpoint(
      monitor='val_acc_all_offn',
      dirpath='mrinal/',
      filename='epoch{epoch:02d}-val_f1_all_offn{val_acc_all_offn:.2f}',
      auto_insert_metric_name=False,
      save_top_k=1,
      mode="max",
  )
  all_callbacks = []
  all_callbacks.append(checkpoint_callback)
  # train
  from pytorch_lightning import seed_everything
  seed_everything(42, workers=True)
  hm_model = Classifier()
  gpus=1
  #if torch.cuda.is_available():gpus=0
  trainer = pl.Trainer(deterministic=True,max_epochs=10,precision=16,callbacks=all_callbacks)
  trainer.fit(hm_model, data_module)
INFO:lightning_fabric.utilities.seed:Seed set to 42
/usr/local/lib/python3.10/dist-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/accelerator_connector.py:556: You passed `Trainer(accelerator='cpu', precision='16-mixed')` but AMP with fp16 is not supported on CPU. Using `precision='bf16-mixed'` instead.
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
WARNING:pytorch_lightning.loggers.tensorboard:Missing logger folder: /content/LLaVA/lightning_logs
INFO:pytorch_lightning.callbacks.model_summary:
   | Name        | Type   | Params
----------------------------------------
0  | MFB         | MFB    | 21.0 M
1  | fin_y_shape | Linear | 393 K 
2  | fin_old     | Linear | 130   
3  | fin         | Linear | 786 K 
4  | fin_inten   | Linear | 12.3 K
5  | fin_e1      | Linear | 130   
6  | fin_e2      | Linear | 130   
7  | fin_e3      | Linear | 130   
8  | fin_e4      | Linear | 130   
9  | fin_e5      | Linear | 130   
10 | fin_e6      | Linear | 130   
11 | fin_e7      | Linear | 130   
12 | fin_e8      | Linear | 130   
13 | fin_e9      | Linear | 130   
----------------------------------------
22.2 M    Trainable params
0         Non-trainable params
22.2 M    Total params
88.792    Total estimated model params size (MB)
Sanity Checking DataLoader 0:   0%
 0/2 [00:00<?, ?it/s]
x.shape torch.Size([10, 768])
y.shape torch.Size([10, 512])
rag.shape torch.Size([10, 768])
z_new shape tensor([[ 0.0144, -0.1677,  0.1100,  ..., -0.1818,  0.4250, -0.2985],
        [-0.2105, -0.1002, -0.0113,  ..., -0.0639,  0.3789, -0.0553],
        [-0.1221, -0.1026, -0.3277,  ..., -0.3724,  0.1562,  0.0286],
        ...,
        [-0.0950,  0.3957,  0.3603,  ..., -0.2121,  0.6465, -0.1983],
        [ 0.0080,  0.2380, -0.0409,  ..., -0.2565,  0.0946, -0.1098],
        [ 0.1351, -0.3463,  0.3371,  ..., -0.2283,  0.4667,  0.0087]])
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-29-279b4c8e1163> in <cell line: 369>()
    367 #if torch.cuda.is_available():gpus=0
    368 trainer = pl.Trainer(deterministic=True,max_epochs=10,precision=16,callbacks=all_callbacks)
--> 369 trainer.fit(hm_model, data_module)

14 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    112 
    113     def forward(self, input: Tensor) -> Tensor:
--> 114         return F.linear(input, self.weight, self.bias)
    115 
    116     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (10x2048 and 64x2)

Double post from here.

Hi, I’m getting the following error. I believe it’s from my input and output shapes mismatching like the others in this thread but I am confused on what to change them to. My x_train_tensor.shape is 1117157, 8, train_dataloader is 32, 8, and net.fc1.weight.shape is 50, 1117157.

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x8 and 1117157x50)

class NetFc(nn.Module):
    def __init__(self):
      super(NetFc, self).__init__()
      self.fc1 = nn.Linear(x_train_tensor.shape[0], 50)
      self.fc2 = nn.Linear(50, 50)
      self.fc3 = nn.Linear(50, 50)
      self.fc4 = nn.Linear(50, 50)
      self.fc5 = nn.Linear(50, 50)
      self.fc6 = nn.Linear(50, 50)
      self.fc7 = nn.Linear(50, 50)
      self.fc8 = nn.Linear(50, y_train_tensor.shape[0])
      
    def forward(self, x):
      x = torch.flatten(x, 1)
      x = F.relu(self.fc1(x))
      x = F.relu(self.fc2(x))
      x = F.relu(self.fc3(x))
      x = F.relu(self.fc4(x))
      x = F.relu(self.fc5(x))
      x = F.relu(self.fc6(x))
      x = F.relu(self.fc7(x))
      x = self.fc8(x)
      #Softmax layer should always be last
      output = F.log_softmax(x, dim=1)

      # Return the output of the network
      return output
def trainMyModel(net,lr,train_dataloader,n_epochs):

  # define loss and optimizer
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(net.parameters(), lr=lr)

  for epoch in range(n_epochs):                  # loop over number of epochs
    running_loss = 0.0
    for data, target in train_dataloader:
      optimizer.zero_grad()                      # zero gradient buffers
      outputs = net(data.float())                      # forward prop
      loss = criterion(outputs, target)          # calculate loss
      loss.backward()                            # backward prop
      optimizer.step()                           # optimize

      # print statistics
      running_loss += loss.item()
      if i % 100 == 99:              # print every 100 mini-batches
        print(f'[{epoch + 1}, {i +1:5d}] loss: {running_loss / 100:.3f}')
        running_loss = 0.0

  print('Finished Training')
  return net
# Train your model.
net = NetFc();
lr = 1e-2;
n_epochs = 2;

trainedNet = trainMyModel(net,lr,train_dataloader,n_epochs);