RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

Your model works fine for me:

model = MyFirstNet()
x = torch.randn(1, 3, 224, 224)
out = model(x)
print(out.shape)
# torch.Size([1, 2])

whaaaaaaat ? no way

why do I have a shape error then ?
where is the error ?

and if you try with the call function trainer ;
trainer(model = model.classify, dataloader=dataset_loader,
num_epochs = 10, optimizer=optimizer_mynet,train=True)

Your trainer function needs a dataloader argument, which is undefined as it needs real data.
Also, your code is not properly formatted. Post a minimal and executable code snippet reproducing the error in case you get stuck.

off course you’re right I’m beginner I’m sorry
here si my code :


normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
composed_transforms = transforms.Compose([transforms.Resize(224),transforms.CenterCrop(224),transforms.ToTensor(), normalize])

#data_dir = 'data_exam'
data_dir = '../datasets/dogscats/'
dataset = {x: datasets.ImageFolder(os.path.join(data_dir, x), transform=composed_transforms)
         for x in ['train', 'valid']}

my data_loader that I have forgotten

def to_shuffle(x):
    if x == 'train':
        return True
    else:
        return False
dataset_loader = {x: torch.utils.data.DataLoader(dataset[x], batch_size=4,
                                               shuffle=to_shuffle(x), num_workers=6)
                for x in ['train', 'valid']}

mu network VGG16

mport torch.nn as nn

class Network(nn.Module):
     def __init__(self):
        super(Network, self).__init__()
        self.categories = 1000 #Nombre de catégories ImageNet
        self.block1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels = 64, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, padding = 1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.block2 = nn.Sequential(
            nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.block3 = nn.Sequential(
            nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.block4 = nn.Sequential(
            nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.block5 = nn.Sequential(
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.classify = nn.Sequential(
            nn.Linear(in_features = 512*7*7, out_features = 4096),
            nn.ReLU(inplace = True),
            nn.Dropout(),
            nn.Linear(in_features = 4096, out_features = 4096),
            nn.ReLU(inplace = True),
            nn.Dropout(),
            nn.Linear(in_features = 4096, out_features = self.categories),
        )       

        def forward(self, x):
            x = self.block1(x)
            x = self.block2(x)
            x = self.block3(x)
            x = self.block4(x)
            x = self.block5(x)
            x = x.view(x.size(0), -1)
            x = self.classify(x)
        return 

mynet=Network()
params_pre = model_zoo.load_url('https://download.pytorch.org/models/vgg16-397923af.pth')
give_params_to_model(mynet, params_pre)

for param in mynet.parameters():
    param.requires_grad = False

mynet.classify._modules['6'] = nn.Linear(in_features = 4096, out_features = 2)
learning_rate = 1e-4
optimizer_mynet = torch.optim.SGD(mynet.classify[6].parameters(),lr = learning_rate)

print(mynet)

Network(
  (block1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block5): Sequential(
    (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classify): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=224, out_features=2, bias=True)
  )
)

my trainer fucntion :

def trainer_exam(model,dataloader, num_epochs,optimizer=None,train=True):
“”"
Implémente une fonction d’entraînement générique qui peut être utilisée pour l’entraînement d’un réseau ainsi que pour l’évaluation
performances sur un ensemble de validation

Arguments :
    model : réseau à entraîner (ou à évaluer). Peut-être le réseau complet ou une seule couche de celui-ci
    dataloader : itérateur sur le jeu de données
    num_epochs (int, optionnel) : nombre d'époques pour l'entraînement (par défaut : 1)
    optimizer : optimiseur utilisé pour l'entraînement. Généralement un objet torch.optim
    train (bool, optionnel) : Exécute la fonction en mode train ou eval (par défaut : True)
"""
sizes = {'train': 23000, 'valid': 2000}
if train:
    model.train()
    phase='train'
else:
    model.eval()
    phase='valid'
print("La phase est {}".format(phase))
for epoch in range(num_epochs):
    total = 0
    running_loss = 0.0
    running_corrects = 0
    batch_counter = 1
    for inputs,classes in dataloader[phase]:
        #inputs , classes = Variable(torch.from_numpy(inputs)),Variable(torch.from_numpy(classes))
        
        outputs = model(inputs)
        loss_function = nn.CrossEntropyLoss()
        loss = loss_function(outputs,classes)
        optimizer = optimizer
        optimizer.zero_grad()
        if train:
            if optimizer is None:
                raise ValueError('Pass optimizer for train mode')
            loss.backward()
            optimizer.step()
        _,preds = torch.max(outputs.data,1)

        running_loss += loss.data.item()
        running_corrects += torch.sum(preds == classes.data)

        print("Cumulated loss of the " + str(batch_counter) + " first batches: {}".format(running_loss))
        batch_counter += 1
        #depend de la version py
    epoch_loss = float(running_loss)
    epoch_acc = float(running_corrects)/sizes[phase]
    print('Loss: {:}, Acc: {:}'.format(epoch_loss, epoch_acc))

trainer_exam(model = mynet.classify, dataloader=dataset_loader,
num_epochs = 10, optimizer=optimizer_mynet,train=True)

result :

La phase est train


RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_15252\302059662.py in
2
3
----> 4 trainer_exam(model = mynet.classify, dataloader=dataset_loader,
5 num_epochs = 10, optimizer=optimizer_mynet,train=True)
6

~\AppData\Local\Temp\ipykernel_15252\3161195043.py in trainer_exam(model, dataloader, num_epochs, optimizer, train)
32 #inputs , classes = Variable(torch.from_numpy(inputs)),Variable(torch.from_numpy(classes))
33
—> 34 outputs = model(inputs)
35 loss_function = nn.CrossEntropyLoss()
36 loss = loss_function(outputs,classes)

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

~\Anaconda3\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
215 def forward(self, input):
216 for module in self:
→ 217 input = module(input)
218 return input
219

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

~\Anaconda3\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) → Tensor:
→ 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) → str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2688x224 and 25088x4096)

Your code is unfortunately still not executable and still works using the previously posted shape:

mynet=Network()
mynet.classify._modules['6'] = nn.Linear(in_features = 4096, out_features = 2)

x = torch.randn(2, 3, 224, 224)
out = mynet(x)

Hi,

I’ve gotten the same error. Can you help me debug it?

`# Define the CNN architecture
class TimeSeriesCNN(nn.Module):
    def __init__(self, input_channels, output_size):
        super(TimeSeriesCNN, self).__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv1d(in_channels=input_channels, out_channels=16, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(28 * final_sequence_length, 64),
            nn.ReLU(),
            nn.Linear(64, output_size)
        )

    def forward(self, x):
        x = self.conv_layer(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc_layer(x)
        return x`

Parameters

input_channels = 1 # Number of input channels (features)
output_size = 4 # Number of output classes or regression values
batch_size = 32
learning_rate = 0.001
num_epochs = 10
final_sequence_length = 10 # Adjust this according to your time series sequence length
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize the model, loss function, and optimizer
model = TimeSeriesCNN(input_channels, output_size)
criterion = nn.MSELoss()  # You can change the loss function as needed
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for batch in dataloader:
        optimizer.zero_grad()
        outputs = model(batch.unsqueeze(1))  # Add the channel dimension
        loss = criterion(outputs, target)  # Replace 'target' with your target values
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')```


RuntimeError Traceback (most recent call last)
Cell In[36], line 10
8 for batch in dataloader:
9 optimizer.zero_grad()
—> 10 outputs = model(batch.unsqueeze(1)) # Add the channel dimension
11 loss = criterion(outputs, target) # Replace ‘target’ with your target values
12 loss.backward()

File ~/anaconda3/envs/python310_pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

Cell In[29], line 22, in TimeSeriesCNN.forward(self, x)
20 x = self.conv_layer(x)
21 x = x.view(x.size(0), -1) # Flatten the tensor
—> 22 x = self.fc_layer(x)
23 return x

File ~/anaconda3/envs/python310_pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

File ~/anaconda3/envs/python310_pytorch/lib/python3.10/site-packages/torch/nn/modules/container.py:217, in Sequential.forward(self, input)
215 def forward(self, input):
216 for module in self:
→ 217 input = module(input)
218 return input

File ~/anaconda3/envs/python310_pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

File ~/anaconda3/envs/python310_pytorch/lib/python3.10/site-packages/torch/nn/modules/linear.py:114, in Linear.forward(self, input)
113 def forward(self, input: Tensor) → Tensor:
→ 114 return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x288 and 320x64)

The error is raised in the first linear layer of self.fc_layer. Change its in_features to 288 and it should work.

Thanks! I solved it by directly changed

nn.Linear(28 * final_sequence_length, 64)

to

nn.Linear(288, 64)

I am getting the same error :mat1 and mat2 shapes cannot be multiplied (2x20 and 13x64)
here is my code
class GraphConvolution(nn.Module):
def init(self, feature_numbers, out_numbers, bias=False) → None:
super().init()
self.bias = bias
self.w = Parameter(torch.FloatTensor(feature_numbers, out_numbers))
if bias:
self.b = Parameter(torch.FloatTensor(out_numbers))
self.reset_parameters()

def reset_parameters(self):
    stdv = 1. / math.sqrt(self.w.size(1))
    self.w.data.uniform_(-stdv, stdv)
    if self.bias is not False:
        self.b.data.uniform_(-stdv, stdv)
        
def forward(self, x, adj):
    print("rgrtgt")
    support = torch.mm(x, self.w)
    out = torch.spmm(adj, support)
    if self.bias:
        out = out + self.b

    return out

class NodeClassificationGCNN(nn.Module):

def __init__(self, feature_num, node_representation_dim, nclass, droupout=0.2, bias=False) -> None:
    super().__init__()
    self.gconv1 = GraphConvolution(feature_num, node_representation_dim, bias)
    self.gconv2 = GraphConvolution(node_representation_dim, nclass, bias)
    self.dropout = droupout
    
def forward(self, x, adj):
    
    x = F.relu(self.gconv1(x, adj))
    x = F.dropout(x, self.dropout, self.training)
    x = F.relu(self.gconv2(x, adj))
    return F.log_softmax(x, dim=1)

dataset = [data]
batch_size = 40
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

Define the training loop

def train():
model.train()
optimizer.zero_grad()
out = model(data.x, data.edge_index)

# Apply the mask to the correct tensors
train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
train_mask[:X_train.shape[0]] = 1

loss = F.nll_loss(out[train_mask], y_train)
loss.backward()
optimizer.step()

Train the model with more iterations

for epoch in range(1000):
train()

Evaluate the updated model

model.eval()
out = model(x, edge_index)
pred = out.argmax(dim=1)

print(“Predicted Labels:”, pred)
print(“Real Target Labels:”, y)

Could you also help me to solve the same error?
RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x36 and 9216x9216)

Here is the model:

class AlexNet_cvs(nn.Module):
    def __init__(self) -> None:
        super(AlexNet_cvs, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(5, 96, kernel_size=10, stride=5, padding=0),   
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(96, 256, kernel_size=4, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=1),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((4, 9))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 4 * 9, 9216),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(9216, 762),
            nn.ReLU(inplace=True),
            nn.Linear(762, 20),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

The data and label examples are like:

print(data)
print(data.shape)

tensor([[0.0000, 0.0000, 1.0000, …, 0.0184, 0.0348, 0.0492],
[0.0000, 0.0000, 1.0000, …, 0.0442, 0.0363, 0.0250],
[0.0000, 0.0000, 1.0000, …, 0.0564, 0.0388, 0.0295],
…,
[1.0000, 0.9606, 0.8394, …, 0.0093, 0.0152, 0.0153],
[1.0000, 0.9524, 0.8419, …, 0.0091, 0.0151, 0.0160],
[1.0000, 0.9305, 0.8363, …, 0.0093, 0.0146, 0.0157]])
torch.Size([50, 200])

print(label)
print(label.shape)

tensor([[ 178., 1878.],
[ 822., 1814.],
[1375., 2162.],
[1669., 2304.],
[2065., 2736.],
[2528., 2780.],
[2836., 3008.],
[3396., 3490.],
[4013., 3518.],
[ nan, nan],
[ nan, nan],
[ nan, nan],
[ nan, nan],
[ nan, nan],
[ nan, nan],
[ nan, nan],
[ nan, nan],
[ nan, nan],
[ nan, nan],
[ nan, nan]], dtype=torch.float64)
torch.Size([20, 2])

Here is the the summary of the model:

I think there’s a proble bewteen “AdaptiveAvgPool2d-14” and “Dropout-15”, but don’t know what is the problem exactly and how to solve the problem.

Please help me to give me a solution or an advice.
I’m looking forward your reply.

Change the in_features of nn.Linear(256 * 4 * 9, 9216), to 36 and it should work.

1 Like

Can someone help me?
I want to run transformers for regression case
I’m getting RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x1 and 4x3)

class TransformerModel(torch.nn.Module):
    def __init__(self, input_size, output_size, num_heads, num_layers, hidden_size, src_input_size=2, tgt_output_size=4):
        super(TransformerModel, self).__init__()

        self.src_embedding = torch.nn.Linear(src_input_size, hidden_size)
        self.tgt_embedding = torch.nn.Linear(tgt_output_size, hidden_size)
        self.src_input_size = src_input_size  # Simpan src_input_size sebagai atribut objek
        self.tgt_output_size = tgt_output_size  # Simpan tgt_output_size sebagai atribut objek

        # Create the transformer model
        self.transformer = torch.nn.Transformer(
            d_model=hidden_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            dropout=0.4
        )

        self.fc = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        
        x = x.view(x.size(0), -1)
        # Embed the input separately for src and tgt
        src = self.src_embedding(x[:, :self.src_input_size])  # Menggunakan self.src_input_size
        tgt = self.tgt_embedding(x[:, self.src_input_size:])  # Menggunakan self.tgt_output_size

        # Apply the transformer model
        x = self.transformer(src, tgt)

        # Apply a linear layer to get the output
        x = self.fc(x)

        return x

# Define model parameters
input_size = 3
output_size = 4
num_heads = 3
num_layers = 2
hidden_size = 3

# Create an instance of the TransformerModel
model = TransformerModel(input_size, output_size, num_heads, num_layers, hidden_size)
model.to(device)

And I also have this input:

image

Thank you so much! It helped me to solve the problem
but there are other problems keep coming… like

RuntimeError: Given groups=1, weight of size [96, 5, 10, 10], expected input[1, 10, 50, 200] to have 5 channels, but got 10 channels instead

Anyway, let me ask it on other discussion.

Hello … I am newbie in pytorch. I try to build this model.
Model :
import torch
import torch.nn as nn

class CustomCNN(nn.Module):
def init(self):
super(CustomCNN, self).init()

    # Convolutional Layers
    self.conv1 = nn.Conv1d(1, 16, kernel_size=3)  # Adapted for input size (1, 65)
    self.conv2 = nn.Conv1d(16, 16, kernel_size=3)
    self.conv3 = nn.Conv1d(16, 64, kernel_size=3)
    self.conv4 = nn.Conv1d(64, 64, kernel_size=3)
    self.conv5 = nn.Conv1d(64, 128, kernel_size=3)
    
    # Max Pooling Layers
    self.pool1 = nn.MaxPool1d(2)
    self.pool2 = nn.MaxPool1d(2)
    self.pool3 = nn.MaxPool1d(2)
    
    # Fully Connected Layers
    self.fc1 = nn.Linear(128 * 7, 64) # The output shape is 896*64
    self.fc2 = nn.Linear(64, 2)  # 2 output classes (binary classification)
    
def forward(self, x):
    # Convolutional layers
    x = torch.relu(self.conv1(x))
    x = torch.relu(self.conv2(x))
    x = self.pool1(x)
    
    x = torch.relu(self.conv3(x))
    x = torch.relu(self.conv4(x))
    x = self.pool2(x)
    
    x = torch.relu(self.conv5(x))
    #x = torch.relu(self.conv6(x))
    x = self.pool3(x)
    
    # Flatten the output
    x = x.view(x.size(0), -1)
    
    # Fully connected layers
    x = torch.relu(self.fc1(x))
    x = torch.sigmoid(self.fc2(x))  # Sigmoid for binary classification
    
    return x

i got an error :mat1 and mat2 shapes cannot be multiplied (2x640 and 896x64)

my input : torch.Size([61784, 1, 65])

Please help me to fix this and if you find some weird code. Please help me to make it better. Thhank you very much for your help

Hello to everyone
I am dealing with Multivariate time series forecasting problem. my loaded data batch = 64 , sequence = 168 , features = 7

class CNN_ForecastNet(nn.Module):
def init(self):
super(CNN_ForecastNet,self).init()
self.conv1d = nn.Conv1d(168,200,kernel_size=1)
self.relu = nn.ReLU(inplace=True)
self.drop_out = nn.Dropout(0.5)
self.max_pooling = nn.MaxPool1d(1)
self.fc1 = nn.Linear(200 , 100)
self.fc2 = nn.Linear(100,1)

def forward(self,x):
    x = self.conv1d(x)
    x = self.relu(x)
    x = self.drop_out(x)
    x = self.max_pooling(x)
    x = x.view(x.size(1) , -1)
    x = self.fc1(x)
    x = self.relu(x)
    x = self.fc2(x)
    
    return x

model = CNN_ForecastNet()

optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)
criterion = nn.MSELoss()

the raised error is : RuntimeError: mat1 and mat2 shapes cannot be multiplied (200x448 and 200x100)

I am new to pytorch, pleas help me what is wrong.

This line:

x = x.view(x.size(1) , -1)

looks wrong and I assume you want to keep the batch dimension while flattening the rest via:

x = x.view(x.size(0), -1)

Even I changed the error is raised

class CNN_ForecastNet(nn.Module):
    def __init__(self):
        super(CNN_ForecastNet,self).__init__()
        self.conv1d = nn.Conv1d(50,200,kernel_size=1)
        self.relu = nn.ReLU(inplace=True)
        self.drop_out = nn.Dropout(0.5)
        self.max_pooling = nn.MaxPool1d(1)
        self.fc1 = nn.Linear(200,100)
        self.fc2 = nn.Linear(100,1)
        
    def forward(self,x):
        x = self.conv1d(x)
        x = self.relu(x)
        x = self.drop_out(x)
        x = self.max_pooling(x)
        x = x.view(x.size(0),-1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        
        return x

the error is

Traceback (most recent call last):

  Cell In[234], line 43
    Train()

  Cell In[234], line 11 in Train
    preds = model(inputs.float())

  File ~\anaconda3\Lib\site-packages\torch\nn\modules\module.py:1501 in _call_impl
    return forward_call(*args, **kwargs)

  Cell In[233], line 17 in forward
    x = self.fc1(x)

  File ~\anaconda3\Lib\site-packages\torch\nn\modules\module.py:1501 in _call_impl
    return forward_call(*args, **kwargs)

  File ~\anaconda3\Lib\site-packages\torch\nn\modules\linear.py:114 in forward
    return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (200x448 and 200x100)

at this time we tried with 50 sequence

I cannot reproduce the issue using:

model = CNN_ForecastNet()
x = torch.randn(64, 50, 1)
out = model(x)

so you might need to adapt the in_features of the first linear layer if your input shape changes.

Many thanks, it is resolved now.

Hello to everyone, I am dealing with multivariate time series forecasting using Transformers.
below is my code step by step:

After some preprocessing and windowing time series dataset …

1- Creating Mask function

input_sequence_length = 10 # incoder input sequence
target_sequence_length = 5 # decoder input sequence

tgt_mask = generate_square_subsequent_mask(
    dim1=target_sequence_length,
    dim2=target_sequence_length
   )
src_mask = generate_square_subsequent_mask(
    dim1=target_sequence_length,
    dim2=input_sequence_length
   )

2- Positional Encoding

class PositionalEncoder(nn.Module):
    def __init__(self, dropout: float = 0.1, 
        max_seq_len: int = 5000, d_model: int = 512,device = device):

        super().__init__()

        self.d_model = d_model
        self.dropout = nn.Dropout(p=dropout)
        self.batch_first = True  # Assuming batch_first is always True

        position = torch.arange(max_seq_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))

        pe = torch.zeros(1, max_seq_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)

        self.register_buffer('pe', pe)
        
    def forward(self, x: Tensor) -> Tensor:
        x = x + self.pe[:, :x.size(1)]
        return self.dropout(x)

3 - Creating Transformers Encoder and Decoder with Pytorch


class TimeSeriesTransformer(nn.Module):

    def __init__(self, 
        input_size: int,
        dec_seq_len: int,
        out_seq_len: int= 5, # target_sequence_length
        dim_val: int=512,  
        n_encoder_layers: int=2,
        n_decoder_layers: int=2,
        n_heads: int=4,
        dropout_encoder: float=0.2, 
        dropout_decoder: float=0.2,
        dropout_pos_enc: float=0.1,
        dim_feedforward_encoder: int=512,
        dim_feedforward_decoder: int=512,
        num_predicted_features: int=1
        ): 

        super().__init__() 

        self.dec_seq_len = dec_seq_len

        self.encoder_input_layer = nn.Linear(
            in_features=input_size, 
            out_features=dim_val 
            )

        self.decoder_input_layer = nn.Linear(
            in_features=num_predicted_features,
            out_features=dim_val
            )  
        
        self.linear_mapping = nn.Linear(
            in_features=dim_val, 
            out_features=num_predicted_features
            )

        # Create positional encoder
        self.positional_encoding_layer = PositionalEncoder(
            d_model=dim_val,
            dropout=dropout_pos_enc
            )

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=dim_val, 
            nhead=n_heads,
            dim_feedforward=dim_feedforward_encoder,
            dropout=dropout_encoder,
            batch_first=True
            )

        self.encoder = nn.TransformerEncoder(
            encoder_layer=encoder_layer,
            num_layers=n_encoder_layers, 
            norm=None
            )

        decoder_layer = nn.TransformerDecoderLayer(
            d_model=dim_val,
            nhead=n_heads,
            dim_feedforward=dim_feedforward_decoder,
            dropout=dropout_decoder,
            batch_first=True
            )

        self.decoder = nn.TransformerDecoder(
            decoder_layer=decoder_layer,
            num_layers=n_decoder_layers, 
            norm=None
            )

    def forward(self, src: Tensor, tgt: Tensor, src_mask: Tensor=None, 
                tgt_mask: Tensor=None) -> Tensor:

        src = self.encoder_input_layer(src) 
      
        src = self.positional_encoding_layer(src) 
        src = self.encoder(src=src)
        
        decoder_output = self.decoder_input_layer(tgt)
        decoder_output = self.decoder(
            tgt=decoder_output,
            memory=src,
            tgt_mask=tgt_mask,
            memory_mask=src_mask
            )
        decoder_output = self.linear_mapping(decoder_output) 
        
        return decoder_output

4 - model

model = TimeSeriesTransformer(
    input_size=7,
    dec_seq_len=5,
    num_predicted_features=1,
    ).to(device)

5 - creating loader # befor created in the preprocessing step

i, batch = next(enumerate(train_loader))
src, trg, trg_y = batch
src = src.to(device) # shape [5 , 10 , 7] , batch size , encoder sequence len , number of feature
trg = trg.to(device) # shape [5 , 5 , 7], batch size , decoder sequence len , number of feature
trg_y = trg_y.to(device) # [5 , 5 , 1] , batch size , deocder or output sequence len , number predicted feature

6 - output of the model

output = model(
    src=src,
    tgt=trg,
    src_mask=src_mask,
    tgt_mask=tgt_mask
    )

7 - Finally the raised error is like below

output = model(
    src=src,
    tgt=trg,
    src_mask=src_mask,
    tgt_mask=tgt_mask
    )
Traceback (most recent call last):

  Cell In[348], line 1
    output = model(

  File C:\ProgramData\anaconda3\Lib\site-packages\torch\nn\modules\module.py:1518 in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)

  File C:\ProgramData\anaconda3\Lib\site-packages\torch\nn\modules\module.py:1527 in _call_impl
    return forward_call(*args, **kwargs)

  Cell In[344], line 80 in forward
    decoder_output = self.decoder_input_layer(tgt)

  File C:\ProgramData\anaconda3\Lib\site-packages\torch\nn\modules\module.py:1518 in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)

  File C:\ProgramData\anaconda3\Lib\site-packages\torch\nn\modules\module.py:1527 in _call_impl
    return forward_call(*args, **kwargs)

  File C:\ProgramData\anaconda3\Lib\site-packages\torch\nn\modules\linear.py:114 in forward
    return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (25x7 and 1x512)

Any assistance will be great appriciate thanks from all