RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

It seems your input contains 31 features while the first linear layer (self.fc1) expects an input with 30 features.
You could fix it by changing the in_features argument of self.fc1 or by making sure the input contains 30 features.

1 Like

Hi , i have same error RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 768x32)
th output

Hope you can help @ptrblck
the model notebook
[Google Colab](https://POP model)

Your notebook doesn’t show any code besides executing a Python script from another folder.
However, the error:

   word_embeddings = self.dropout(self.fc(word_embeddings))
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/linear.py", line 114, in forward
    return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 768x32)

points to a shape mismatch in self.fc which expects 768 input features while the activation only contains 1 feature.

here is the code
https://github.com/HumaticsLAB/POP-Mining-POtential-Performance
how can i solve it ?

Check the aforementioned layer specs and the corresponding input activation as described before.
In case you get stuck, please create a minimal and executable code snippet to reproduce the issue.

sorry i don’t get it, can you explain more?

Hi, I get the same run time error with my code (mat1 and mat2 shapes cannot be multiplied (1x25088 and 15x10)):

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

class NetMLP(torch.nn.Module):
    def __init__(self, input_features, size_hidden, n_output):
        super(NetMLP, self).__init__()
        self.hidden1 = torch.nn.Linear(input_features, size_hidden, bias=True, device=None, dtype=None)
        self.hidden2 = torch.nn.Linear(size_hidden, size_hidden, bias=True, device=None, dtype=None) 
        self.out = torch.nn.Linear(size_hidden, n_output, bias=True, device=None, dtype=None) 

    def forward(self, x):
        x = x.view(-1,)
        x = self.out(x)       # linear output
        return x
input_features = 28*28
hidden_neurons = 15
num_outputs = 10

batch_size = 50

learning_rate = 0.0001

device = torch.device('cuda:0')

net = NetMLP(input_features, hidden_neurons, num_outputs)
net.to(device)

optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

criterion = nn.CrossEntropyLoss()

loader = DataLoader(dataset=mnist_train, batch_size=32, shuffle=True)

num_epochs = 5

loss_list = []

for i in range(num_epochs):

    for x, y in loader:
        optimizer.zero_grad()

        x = x.to(device)
        y = y.to(device)

        loss = criterion(net(x), y)
        
        loss.backward()
        
        optimizer.step()

        loss_list.append(loss.data.item())
  
    #if i > 0 and i % 100 == 0:
    print('Epoch %d, loss = %g' % (i, loss))
```´

Many thanks!

In your model you are using only the last linear layer via self.out while I would guess self.hidden1 and self.hidden2 should also be used.
Additionally, you are flattening the input tensor and are mixing the batch with the feature dimension in:

x = x.view(-1)

Take a look at this tutorial to see how a simple neural network is implemented and used.

Hi I am a newbie, Even I have a similar error. `import torch

import torch.nn as nn
import torch.nn.functional as F
class Auxiliary(nn.Module):
    def __init__(self, input_channels, n_classes):
        super(Auxiliary, self).__init__()
        self.Conv2 = nn.Conv2d(input_channels, 128, kernel_size=1)
        self.FC1 = nn.Linear(2048, 1024)
        self.FC2 = nn.Linear(1024, n_classes)
    def forward(self, x):
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = F.adaptive_avg_pool2d(x, (4, 4))
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.Conv2(x)
        # N x 128 x 4 x 4
        x = x.view(x.size(0), -1)
        # N x 2048
        x = F.relu(self.FC1(x), inplace=True)
        # N x 2048
        x = F.dropout(x, 0.7, training=self.training)
        # N x 2048
        x = self.FC2(x)
        # N x 1024
        return x


class Inception(nn.Module):
    def __init__(self, input_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj):
        super(Inception, self).__init__()


        # 1x1conv branch
        self.inception_1 = nn.Sequential(
            nn.Conv2d(input_channels, n1x1, kernel_size=1)
        )

        self.inception_2 = nn.Sequential(
            nn.Conv2d(input_channels, n3x3_reduce, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(n3x3_reduce, n3x3, kernel_size=1),
            nn.ReLU(inplace=True)
        )

        self.inception_3 = nn.Sequential(
            nn.Conv2d(input_channels, n5x5_reduce, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(n5x5_reduce, n5x5, kernel_size=1),
            nn.ReLU(inplace=True)
        )

        self.inception_4 = nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            nn.Conv2d(input_channels, pool_proj, kernel_size=1),
            nn.ReLU(inplace=True)
        )


    def forward(self, x):
        return torch.cat((self.inception_1(x),self.inception_2(x),self.inception_3(x),self.inception_4(x)), dim=1)




class Net(nn.Module):
    def __init__(self, n_classes):
        super(Net, self).__init__()

        self.pre_layer = nn.Sequential(
            # N x 3 x 224 x 224
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            # N x 64 x 112 x 112
            nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
            # N x 64 x 56 x 56
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            # N x 64 x 56 x 56
            nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(192),
            nn.ReLU(inplace=True),
            # N x 192 x 56 x 56
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True),
        )

        # N x 192 x 28 x 28
        self.inception_3a = Inception(192, 64, 96, 128, 16, 32, 32)
        # N x 256 x 28 x 28
        self.inception_3b = Inception(256, 128, 128, 192, 32, 96, 64)
        # N x 480 x 28 x 28
        self.maxPool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # N x 480 x 14 x 14
        self.inception_4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.aux1 = Auxiliary(512, n_classes)
        # N x 512 x 14 x 14
        self.inception_4b = Inception(512, 160, 112, 224, 24, 64, 64)
        # N x 512 x 14 x 14
        self.inception_4c = Inception(512, 128, 128, 256, 24, 64, 64)
        # N x 512 x 14 x 14
        self.inception_4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.aux2 = Auxiliary(528, n_classes)
        # N x 528 x 14 x 14
        self.inception_4e = Inception(528, 256, 160, 320, 32, 128, 128)
        # N x 832 x 14 x 14
        self.maxPool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # N x 832 x 7 x 7
        self.inception_5a = Inception(832, 256, 160, 320, 32, 128, 128)
        # N x 832 x 7 x 7
        self.inception_5b = Inception(832, 384, 192, 384, 48, 128, 128)
        # N x 1024 x 7 x 7
        self.avgPool5 = nn.AvgPool2d(kernel_size=7, stride=1)

        # N x 1024 x 1 x 1
        self.dropout = nn.Dropout(p=0.4)
        # N x 1024
        self.linear = nn.Linear(in_features=1024, out_features=n_classes)



    def forward(self, x, mode=True):
        x = self.pre_layer(x)

        x = self.inception_3a(x)
        x = self.inception_3b(x)
        x = self.maxPool3(x)

        x = self.inception_4a(x)

        if mode:
            aux1 = self.aux1(x)
        else:
            aux1 = None

        x = self.inception_4b(x)
        x = self.inception_4c(x)
        x = self.inception_4d(x)

        if mode:
            aux2 = self.aux2(x)
        else:
            aux2 = None

        x = self.inception_4e(x)
        x = self.maxPool4(x)

        x = self.inception_5a(x)
        x = self.inception_5b(x)

        x = self.avgPool5(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.linear(x)
        x = F.softmax(x, dim=1)

        return x, aux1, aux2

`

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-123-6378d8f2f25e> in <module>
     11         labels=labels.to(device)
     12         total+=labels.size(0)
---> 13         output,aux1,aux2=model(images)
     14         aux1_loss=criterion(aux1, labels)
     15         aux2_loss=criterion(aux2, labels)

3 frames
/usr/local/lib/python3.9/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    112 
    113     def forward(self, input: Tensor) -> Tensor:
--> 114         return F.linear(input, self.weight, self.bias)
    115 
    116     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x16384 and 1024x2)`
type or paste code here

I guess self.linear is raising the error in Net so you should also check the shape of its input activation in Net.forward and change the in_features to 16384 as mentioned in previous posts.

Also, assuming you are working on a multi-class classification use case using nn.CrossEntropyLoss as the criterion remove the last F.softmax activation as raw logits are expected.

1 Like
################## DNN ##################
class BeamsNetV1(nn.Module):
    def __init__(self):
        super(BeamsNetV1, self).__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv1d(in_channels=3, out_channels=6,
                      kernel_size=2, stride=1),
            nn.Tanh(),
        )
        self.ConvToFc = nn.Sequential(
            nn.Linear(1188, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 2),
            nn.ReLU(),
        )
        self.FC_output = nn.Sequential(
            nn.Linear(4 + 2, 3),
        )
        self.initialize_weights()

    def forward(self, x1, x2, y):
        x1 = self.conv_layer(x1)
        x2 = self.conv_layer(x2)
        x1 = torch.flatten(x1, 1)
        x2 = torch.flatten(x2, 1)
        x = torch.column_stack((x1, x2))
        x = F.dropout(x, p=0.2)
        x = self.ConvToFc(x)
        x = torch.column_stack((x, y))
        x = self.FC_output(x)
        return x

    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_uniform_(m.weight)
            if isinstance(m, nn.Linear):
                nn.init.kaiming_uniform_(m.weight)

when i try to print the model.(inputs11,inputs12,input2) I am getting mat1 and mat2 shapes cannot be multiplied (2x239988 and 1188x512)

The error is raised in the first linear layer in self.ConvToFc as the number of input features does not match since the incoming activation has 239988 features while 1188 are expected. Change the in_features value of the first linear layer to 239988 or reduce the size of the input activation by using e.g. pooling layers and it should work.

I have 1188x512 parameter sized pickle file to be used for trained model. I dont have access to pickle file. How do I solve that?

If you cannot change the linear layer since you are loading pretrained parameters you might then need to create a matching activation e.g. via an adaptive pooling layer.

Hi, I’m appreciate to your help. can you check my code with this error for me? (
RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x18433 and 6400x4098))

class AlexNet(nn.Module):
def init(self, num_classes=10):
super(AlexNet, self).init()

    self.layer1 = nn.Sequential(
        nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
        nn.BatchNorm2d(96),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 3, stride = 2))
    self.layer2 = nn.Sequential(
        nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 3, stride = 2))
    self.layer3 = nn.Sequential(
        nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(384),
        nn.ReLU())
    self.layer4 = nn.Sequential(
        nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(384),
        nn.ReLU())
    self.layer5 = nn.Sequential(
        nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 3, stride = 2)) 
    
    # Calculate the correct input shape for the fully-connected layer using a dummy input
    dummy_input = torch.randn(1, 3, 224, 224)
    dummy_output = self.layer5(self.layer4(self.layer3(self.layer2(self.layer1(dummy_input)))))
    fc_input_shape = dummy_output.numel()

    self.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(fc_input_shape, 4096 + 2),
        nn.ReLU())
    self.fc1 = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(4096 + 2, 4096 + 2),
        nn.ReLU())
    self.fc2 = nn.Sequential(
        nn.Linear(4096 + 2, num_classes))

def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = self.layer5(out)
    out = out.reshape(out.size(0), -1)
    input_added_neurons = self.add_neurons(out)
    out = self.fc(input_added_neurons)
    out = self.fc1(out)
    out = self.fc2(out)
    return out

Based on your code it seems you are using a dummy forward pass on the model’s __init__ method while you could also just directly use nn.Lazy* modules which would calculate the input feature size for you using the first forward pass.
In any case I don’t know how add_neurons is defined and would guess it’s increasing the feature space thus creating the shape mismatch.

I am facing same issue
RuntimeError: mat1 and mat2 shapes cannot be multiplied (4x40 and 12098x25)

def create_mfcc (audio, sr):
    n_frames = int(sr*1.2)
    start_frame = int(len(audio) // 2 - n_frames // 2)
    end_frame = start_frame + n_frames
    audio_cut = audio[start_frame:end_frame]
    feature = np.mean(librosa.feature.mfcc(y=audio_cut,sr=sr,n_mfcc=40).T,axis=0)
    return feature

def create_origin_mfcc (data_path):
    S, sr = librosa.load(data_path)
    feature = create_mfcc(S,sr)
    return feature

def create_noise_mfcc (data_path):
    S, sr = librosa.load(data_path)
    noise_amp = 0.035*np.random.uniform()*np.amax(S)
    S = S + noise_amp*np.random.normal(size=S.shape[0])
    feature = create_mfcc(S,sr)
    return feature

def create_pitch_mfcc (data_path):
    S, sr = librosa.load(data_path)
    S = librosa.effects.pitch_shift(y=S, sr=sr, n_steps=0.7)
    feature = create_mfcc(S,sr)
    return feature

def create_stretch_mfcc (data_path):
    S, sr = librosa.load(data_path)
    S = librosa.effects.time_stretch(y=S, rate=0.85)
    feature = create_mfcc(S,sr)
    return feature

class Data(Dataset):
  def __init__(self, X_train, y_train):
    # need to convert float64 to float32 else 
    # will get the following error
    # RuntimeError: expected scalar type Double but found Float
    self.X = torch.from_numpy(X_train.numpy().astype(np.float32))
    # need to convert float64 to Long else 
    # will get the following error
    # RuntimeError: expected scalar type Long but found Float
    self.y = y_train.type(torch.LongTensor)
    self.len = self.X.shape[0]
  
  def __getitem__(self, index):
    return self.X[index], self.y[index]
  def __len__(self):
    return self.len

    

# Define the neural network model

class Network(nn.Module):
  def __init__(self,input_dim,hidden_layers,output_dim):
    super(Network, self).__init__()
    self.linear1 = nn.Linear(input_dim, hidden_layers)
    self.linear2 = nn.Linear(hidden_layers, output_dim)
  def forward(self, x):
    x = torch.sigmoid(self.linear1(x))
    x = x.view(x.size(0),-1)
    x = self.linear2(x)
    return x

data = []
path_to_data = '/data/ganji_sreeram/Interns/Vipendra:Emotion_Recognition/Wav2vec2_Emotion/Resampled-11-Emotion-Data'
for path in tqdm(Path(path_to_data).glob("**/*.wav")):
    name = str(path).split('/')[-1].split('.')[0]
    label = str(path).split('.')[-2].split('/')[-2]
    try:
        # There are some broken files
        
        s = torchaudio.load(path)
        
        data.append({
            "name": name,
            "path": path,
            "emotion": label
        })
    except Exception as e:
        # print(str(path), e)
        print(e)
        pass

            # break
df = pd.DataFrame(data)
# Add a new column "status" to the DataFrame indicating if the path exists or not
df["status"] = df["path"].apply(lambda path: True if os.path.exists(path) else None)
# Drop rows where the "path" column is missing or invalid
df = df.dropna(subset=["path"])
# Drop the "status" column from the DataFrame
df = df.drop(labels="status", axis=1)
# Print the length of the DataFrame after the initial filtering step
print(f"Step 1: {len(df)}")
# Shuffle the DataFrame randomly
df = df.sample(frac=1)
# Reset the index of the DataFrame after shuffling
df = df.reset_index(drop=True)
df.head()
# Print the unique labels in the "emotion" column of the DataFrame
print("Labels: ", df["emotion"].unique())
# Count the number of paths for each emotion category
df.groupby("emotion").count()[["path"]]

origin = df['path'].apply(lambda x:create_origin_mfcc(x))
save_path = "/data/ganji_sreeram/Interns/Vipendra:Emotion_Recognition/Wav2vec2_Emotion/NNclassifier/Data/CSV"
# Split the DataFrame into train and test sets
train_df, test_df = train_test_split(df, test_size=0.19, random_state=101, stratify=df["emotion"])
# Reset the index of the train and test DataFrames
train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)
# Save the train DataFrame to a CSV file
train_df.to_csv(f"{save_path}/train.csv", sep="\t", encoding="utf-8", index=False)
# Save the test DataFrame to a CSV file
test_df.to_csv(f"{save_path}/test.csv", sep="\t", encoding="utf-8", index=False)
# Print the shape of the train and test DataFrames
print(f"train.csv contains {train_df.shape}")
print(f"test.csv contains {test_df.shape}")
# Return the paths of the train.csv and test.csv files
        
ohe= OneHotEncoder()
y1 = ohe.fit_transform( df[['emotion']] )
y1 = y1.toarray()
y2 = ohe.fit_transform( df[['emotion']] )
y2 = y2.toarray()
y3 = ohe.fit_transform( df[['emotion']] )
y3 = y3.toarray()
y4 = ohe.fit_transform( df[['emotion']] )
y4 = y4.toarray()
mfcc_data = np.array(origin)
y = np.concatenate([y1,y2,y3,y4])
y = np.array(y1)
X = [x for x in mfcc_data]
X = np.array(X)
X = torch.from_numpy(X)

X.shape 
y_label = list ()
for i in range(len(y)):
    if y[i][0] > 0.5:
            y_label.append (0)
    elif y[i][1] > 0.5:
            y_label.append (1)
    elif y[i][2] > 0.5:
            y_label.append (2)
    elif y[i][3] > 0.5:
           y_label.append (3)
    elif y[i][4] > 0.5:
            y_label.append (4)
    elif y[i][5] > 0.5:
            y_label.append (5)
    elif y[i][6] > 0.5:
            y_label.append (6)
    elif y[i][7] >0.5:
            y_label.append (7)
    elif y[i][8] >0.5:
            y_label.append (8)
    elif y[i][9] >0.5:
            y_label.append (9)
    elif y[i][10] >0.5:
            y_label.append (10)
           
        
y_trans = np.array (y_label)
y_trans = torch.from_numpy(y_trans)
X_train,X_test,y_train,y_test = train_test_split(X,y_trans,test_size=0.17,random_state=42)
batch_size = 4
traindata = Data(X_train, y_train)
trainloader = DataLoader(traindata, batch_size=batch_size,shuffle=True, num_workers=2)
input_dim = X.shape[0]
output_dim = 11
hidden_layers=25
clf = Network(input_dim,hidden_layers,output_dim)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(clf.parameters(), lr=0.0001)
# Set the hyperparameters


epochs = 2
for epoch in range(epochs):
  running_loss = 0.0
  for i, data in enumerate(trainloader, 0):
    inputs, labels = data
    # set optimizer to zero grad to remove previous epoch gradients
    optimizer.zero_grad()
    # forward propagation
    outputs = clf(inputs)
    loss = criterion(outputs, labels)
    # backward propagation
    loss.backward()
    # optimize
    optimizer.step()
    running_loss += loss.item()
  # display statistics
  print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.5f}')

This code:

input_dim = X.shape[0]
clf = Network(input_dim,hidden_layers,output_dim)

looks a bit strange as you are using X.shape[0] for the input feature dimension while dim0 is usually the batch dimension.
Could you check if 12098 is representing the number of samples and if X.shape[1] should be used instead?

ohk , yeah I got your point ,12098 is samples, so features will be in x.shape[1].
Thanks for answer

I have the same pb : RuntimeError: mat1 and mat2 shapes cannot be multiplied (2688x224 and 25088x4096)
and I really don’t know where it’s came from…
I use data cats and dogs from here : Dogs & Cats Images | Kaggle

and a classic VGG16

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
composed_transforms = transforms.Compose([transforms.Resize(224),transforms.CenterCrop(224),transforms.ToTensor(), normalize])

data_dir = ‘…/datasets/dogscats2/’
dataset = {x: datasets.ImageFolder(os.path.join(data_dir, x), transform=composed_transforms)
for x in [‘training_set’]}

dataset_sizes = {x: len(dataset) for x in [‘training_set’]}
print(dataset_sizes)
{‘training_set’: 8000}

dataset[‘training_set’][1][0].shape
torch.Size([3, 224, 224])

class MyFirstNet(nn.Module):

def __init__(self):
    super(MyFirstNet, self).__init__()
    self.categories = 1000 #Nombre de catégories ImageNet
    self.block1 = nn.Sequential(
        nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, padding = 1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size = 2, stride = 2),
    )
    self.block2 = nn.Sequential(
        nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.MaxPool2d(kernel_size = 2, stride = 2),
    )
    self.block3 = nn.Sequential(
        nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.MaxPool2d(kernel_size = 2, stride = 2),
    )
    self.block4 = nn.Sequential(
        nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.MaxPool2d(kernel_size = 2, stride = 2),
    )
    self.block5 = nn.Sequential(
        nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
        nn.ReLU(inplace = True),
        nn.MaxPool2d(kernel_size = 2, stride = 2),
    )
    self.classify = nn.Sequential(
        nn.Linear(in_features = 512 * 7 * 7, out_features = 4096),
        nn.ReLU(inplace = True),
        nn.Dropout(),
        nn.Linear(in_features = 4096, out_features = 4096),
        nn.ReLU(inplace = True),
        nn.Dropout(),
        nn.Linear(in_features = 4096, out_features = 2)
    )       

def forward(self, x):
    x = self.block1(x)
    x = self.block2(x)
    x = self.block3(x)
    x = self.block4(x)
    x = self.block5(x)
    x = x.view(x.size(0), -1) 
    x = x.view(x.size(0), -1)
    x = self.classify(x)
    return x

model = MyFirstNet()
from torch.utils import model_zoo
params_pre = model_zoo.load_url(‘https://download.pytorch.org/models/vgg16-397923af.pth’)

def give_params_to_model(network, params) :
i = 0
for _, child in enumerate(network.children()):
for _, grand_child in enumerate(child.children()):
classname = grand_child.class.name
if classname.find(‘Conv’) != -1:
grand_child.weight.data = params[‘features.’+str(i)+‘.weight’]
grand_child.bias.data = params[‘features.’+str(i)+‘.bias’]
if classname.find(‘Linear’) != -1:
grand_child.weight.data = params[‘classifier.’+str(i-31)+‘.weight’]
grand_child.bias.data = params[‘classifier.’+str(i-31)+‘.bias’]
#print(petit-enfant,‘classifier.’+str(i-31))
i+=1

for param in model.parameters():
param.requires_grad = False

learning_rate = 1e-4
optimizer_mynet = torch.optim.SGD(model.classify[6].parameters(),lr = learning_rate)

def trainer(model,dataloader, num_epochs,optimizer=None,train=True):
sizes = {‘training_set’: 8000, ‘valid’: 2000}
if train:
model.train()
phase=‘training_set’
else:
model.eval()
phase=‘valid’
print(“La phase est {}”.format(phase))
for epoch in range(num_epochs):
total = 0
running_loss = 0.0
running_corrects = 0
batch_counter = 1
for inputs,classes in dataloader[phase]:
outputs = model(inputs)
loss_function = nn.CrossEntropyLoss()
loss = loss_function(outputs,classes)
optimizer = optimizer
optimizer.zero_grad()
if train:
if optimizer is None:
raise ValueError(‘Pass optimizer for train mode’)
loss.backward()
optimizer.step()
_,preds = torch.max(outputs.data,1)
running_loss += loss.data.item()
running_corrects += torch.sum(preds == classes.data)
print(“Cumulated loss of the " + str(batch_counter) + " first batches: {}”.format(running_loss))
batch_counter += 1
#depend de la version py
epoch_loss = float(running_loss)
epoch_acc = float(running_corrects)/sizes[phase]
print(‘Loss: {:}, Acc: {:}’.format(epoch_loss, epoch_acc))

trainer(model = model.classify, dataloader=dataset_loader,
num_epochs = 10, optimizer=optimizer_mynet,train=True)

what is wrong in my code ?