I’m trying to move from keras to pytorch, however with the same model and same optimizer with keras and pytorch, I always get much worse results by using pytorch, any idea why I get this strange results? Thanks a lot for your help.

The keras model what I’m using is:

```
def cnn_best(input_shape, classes):
# From VGG16 design
input_shape = (700, 1)
img_input = Input(shape=input_shape)
# Block 1
x = Conv1D(64, 11, activation='relu', padding='same', name='block1_conv1')(img_input)
x = AveragePooling1D(2, strides=2, name='block1_pool')(x)
# Block 2
x = Conv1D(128, 11, activation='relu', padding='same', name='block2_conv1')(x)
x = AveragePooling1D(2, strides=2, name='block2_pool')(x)
# Block 3
x = Conv1D(256, 11, activation='relu', padding='same', name='block3_conv1')(x)
x = AveragePooling1D(2, strides=2, name='block3_pool')(x)
# Block 4
x = Conv1D(512, 11, activation='relu', padding='same', name='block4_conv1')(x)
x = AveragePooling1D(2, strides=2, name='block4_pool')(x)
# Block 5
x = Conv1D(512, 11, activation='relu', padding='same', name='block5_conv1')(x)
x = AveragePooling1D(2, strides=2, name='block5_pool')(x)
# Classification block
x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dense(4096, activation='relu', name='fc2')(x)
out = Dense(classes, activation='softmax', name='predictions')(x)
inputs = img_input
# # Create model.
# model = Model(inputs, x, name='cnn_best')
# optimizer = RMSprop(lr=0.00001)
# model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
# return model
print(' -- model was built.')
return inputs, out
optimizer = keras.optimizers.RMSprop(lr=1e-5)#'adam'#''adadelta'
model.compile(loss='categorical_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
```

The pytorch model is:

```
class ascadCNNbest(nn.Module):
def __init__(self, num_classes):
""" Constructor
Args:
num_classes: number of classes
"""
super(ascadCNNbest, self).__init__()
self.num_classes = num_classes
self.traceLen = traceLen
self.conv1 = nn.Conv1d(1, 64, kernel_size=11, stride=1, padding=5)
self.conv2 = nn.Conv1d(64, 128, kernel_size=11, stride=1, padding=5)
self.conv3 = nn.Conv1d(128, 256, kernel_size=11, stride=1, padding=5)
self.conv4 = nn.Conv1d(256, 512, kernel_size=11, stride=1, padding=5)
self.conv5 = nn.Conv1d(512, 512, kernel_size=11, stride=1, padding=5)
self.fc1 = nn.Linear(10752, 4096)
self.fc2 = nn.Linear(4096, 4096)
self.fc3 = nn.Linear(4096, num_classes)
def forward(self, x):
out = F.relu(self.conv1(x))
out = F.avg_pool1d(out, 2)
out = F.relu(self.conv2(out))
out = F.avg_pool1d(out, 2)
out = F.relu(self.conv3(out))
out = F.avg_pool1d(out, 2)
out = F.relu(self.conv4(out))
out = F.avg_pool1d(out, 2)
out = F.relu(self.conv5(out))
out = F.avg_pool1d(out, 2)
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(net.parameters(),lr=1e-5)
```

For both models I’m using the same batch_size and same optimizer (RMSprop with learning rate of 1e-5, actually I’ve tried different learning rate for pytorch because of much worse results but still similar worse results). Thanks again for any help.