Loss different between the pytorch and keras

Hi everyone!
I am newbie for PyTorch.
i try to rewrite my network from keras to pytorch. keras can decrease loss to 500 but pytorch stuck at 1000.
[keras]
Seq_deepCpf1_Input_SEQ = Input(shape=(34, 4))
Seq_deepCpf1_C1 = Convolution1D(80, 5, activation=‘relu’)(Seq_deepCpf1_Input_SEQ)
Seq_deepCpf1_P1 = AveragePooling1D(2)(Seq_deepCpf1_C1)
Seq_deepCpf1_F = Flatten()(Seq_deepCpf1_P1)
Seq_deepCpf1_DO1 = Dropout(0.3)(Seq_deepCpf1_F)
Seq_deepCpf1_D1 = Dense(80, activation=‘relu’)(Seq_deepCpf1_DO1)
Seq_deepCpf1_DO2 = Dropout(0.3)(Seq_deepCpf1_D1)
Seq_deepCpf1_D2 = Dense(40, activation=‘relu’)(Seq_deepCpf1_DO2)
Seq_deepCpf1_DO3 = Dropout(0.3)(Seq_deepCpf1_D2)
Seq_deepCpf1_D3 = Dense(40, activation=‘relu’)(Seq_deepCpf1_DO3)
Seq_deepCpf1_DO4 = Dropout(0.3)(Seq_deepCpf1_D3)
Seq_deepCpf1_Output = Dense(1, activation=‘linear’)(Seq_deepCpf1_DO4)
Seq_deepCpf1 = Model(inputs=[Seq_deepCpf1_Input_SEQ], outputs=[Seq_deepCpf1_Output])
print(Seq_deepCpf1.summary())

import keras
Seq_deepCpf1.compile(optimizer=keras.optimizers.Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0),
                    loss='mse')
Seq_deepCpf1.fit(x=SEQ, y=indel_f,epochs=50)

[pytorch]
class Regression(nn.Module):
def init(self):
super(Regression, self).init()
self.conv1d = nn.Conv1d(4, 80, 5, 1) # 进去4通道出来80通道 (30,80)
self.relu = nn.ReLU()
self.avg1d = nn.AvgPool1d(2) # size of window 2 (15,80)
self.flatten = nn.Flatten()
self.dropout = nn.Dropout(p=0.3)
self.linear1200_80 = nn.Linear(80 * 15, 80)
self.linear80_40 = nn.Linear(80, 40) #(None, 40)
self.linear40_40 = nn.Linear(40, 40) # (None, 40)
self.linear40_1 = nn.Linear(40, 1) # (None, 40)

def forward(self, x):
    outconv1d = self.conv1d(x) # 进去4通道出来80通道 (30,80)
    outact = self.relu(outconv1d)
    # Seq_deepCpf1_C1 = Convolution1D(80, 5)(Seq_deepCpf1_Input_SEQ)
    outavg1d = self.avg1d(outact)  # size of window 2  (15,80)
    # Seq_deepCpf1_P1 = AveragePooling1D(2)(Seq_deepCpf1_C1)
    out_flatten = self.flatten(outavg1d)
    # Seq_deepCpf1_F = Flatten()(Seq_deepCpf1_P1)
    out_dropout = self.dropout(out_flatten)
    # Seq_deepCpf1_DO1 = Dropout(0.3)(Seq_deepCpf1_F)
    out_linear1200_80 = self.linear1200_80(out_dropout)
    out_act_linear1200_80 = self.relu(out_linear1200_80)
    # Seq_deepCpf1_D1 = Dense(80, activation='relu')(Seq_deepCpf1_DO1)
    out_dropout1200_80 = self.dropout(out_act_linear1200_80)
    # Seq_deepCpf1_DO2 = Dropout(0.3)(Seq_deepCpf1_D1)
    out_linear80_40 = self.linear80_40(out_dropout1200_80)
    out_act80_40 = self.relu(out_linear80_40)
    # Seq_deepCpf1_D2 = Dense(40, activation='relu')(Seq_deepCpf1_DO2)
    out_dropout80_40 = self.dropout(out_act80_40)
    # Seq_deepCpf1_DO3 = Dropout(0.3)(Seq_deepCpf1_D2)
    out_linear40_40 = self.linear40_40(out_dropout80_40)
    out_act40_40 = self.relu(out_linear40_40)
    # Seq_deepCpf1_D3 = Dense(40, activation='relu')(Seq_deepCpf1_DO3)
    out_dropout40_40 = self.dropout(out_act40_40)
    # Seq_deepCpf1_DO4 = Dropout(0.3)(Seq_deepCpf1_D3)
    out = self.linear40_1(out_dropout40_40)
    # Seq_deepCpf1_Output = Dense(1, activation='linear')(Seq_deepCpf1_DO4)
    return out


model = Regression().to(device)
loss = nn.MSELoss()  # 所以 loss 使用 MSELoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)  # optimizer 使用 Adam
num_epoch = 50

for epoch in range(num_epoch):
    train_loss = 0.0
    count = int(len(train_x)/batch_size)+1
    model.train()  
    
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() 


        train_pred = model(data[0].to(device=device)) 
        batch_loss = loss(train_pred, data[1].to(device=device))  
        batch_loss.backward()  
        # print(str(i))
        optimizer.step() 

        # train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())#和groud thuth 比较看正确率
        train_loss += batch_loss.item()

    j = j + 1
    print("Epoch :", epoch  ,"train_loss:",train_loss/count)

keras output
Epoch 50/50

1000/14999 [=>…] - ETA: 0s - loss: 514.2870
3000/14999 [=====>…] - ETA: 0s - loss: 533.6077
5000/14999 [=========>…] - ETA: 0s - loss: 529.4184
7000/14999 [=============>…] - ETA: 0s - loss: 523.6750
9000/14999 [=================>…] - ETA: 0s - loss: 517.9706
11000/14999 [=====================>…] - ETA: 0s - loss: 516.3988
13000/14999 [=========================>…] - ETA: 0s - loss: 516.1699
14999/14999 [==============================] - 0s 28us/step - loss: 510.5814

pytorch output
Epoch : 42 train_loss: 1107.6384684244792
Epoch : 43 train_loss: 1124.8985188802083
Epoch : 44 train_loss: 1117.5798095703126
Epoch : 45 train_loss: 1103.8336100260417
Epoch : 46 train_loss: 1100.827498372396
Epoch : 47 train_loss: 1104.8447998046875
Epoch : 48 train_loss: 1101.6757080078125
Epoch : 49 train_loss: 1100.1193359375

code and data are available on GitHub


keras code is in DeepCpf1.py
pytorch code is in DeepCpf1_pytorch.py

thanks

I can’t find any obvious differences.
Sometimes an unwanted broadcasting takes place in the loss calculation, if you don’t pass an output and target in the same shape to nn.MSELoss, which should raise a warning but if often overlooked.
Could you double check it in your PyTorch code?

Excellent answer. thank you so much ptrblck. The loss is 500 now.

1 Like