I’m trying to convert CNN model code from Keras with a Tensorflow backend to Pytorch.
Problem is that I can’t seem to find the equivalent of Keras’ ‘categorical crossentrophy’ function:
model.compile(loss=‘categorical_crossentropy’, optimizer=‘adam’, metrics=[‘accuracy’])
The closest I can find is this:
self._criterion = nn.CrossEntropyLoss()
self._optimizer = optim.Adam(self._model.parameters(), eps=1e-07)
…
loss = self._criterion(outputs, primary_indexes)
loss.backward()
But it doesn’t function similarly and as well as the original Keras code. It takes twice as many epochs to end on the original dataset and doesn’t work as well, and in my larger datasets the loss and accuracy goes from around ~15-20% at the first epoch to around 4% when training ends.
Whereas the Keras version goes from ~15-20% to around ~40-55% when training ends.
Here’s the original Keras model:
model = models.Sequential()
model.add(Reshape(in_shp+[1], input_shape=in_shp))
model.add(ZeroPadding2D((0,2)))
model.add(Conv2D(64, (1,4), activation=“relu”))
model.add(Dropout(dr))
model.add(ZeroPadding2D((0,2)))
model.add(Conv2D(64, (2,4), activation=“relu”))
model.add(Dropout(dr))
model.add(Conv2D(128, (1,8), activation=“relu”))
model.add(Dropout(dr))
model.add(Conv2D(128, (1,8), activation=“relu”))
model.add(Dropout(dr))
model.add(Flatten())
model.add(Dense(256, activation=‘relu’))
model.add(Dropout(dr))
model.add(Dense(len(classes), activation=‘softmax’))
model.add(Reshape([len(classes)]))
model.compile(loss=‘categorical_crossentropy’, optimizer=‘adam’, metrics=[‘accuracy’])
model.summary()
Layer (type) Output Shape Param #
reshape_1 (Reshape) (None, 2, 128, 1) 0
zero_padding2d_1 (ZeroPadding) (None, 2, 132, 1) 0
conv2d_1 (Conv2D) (None, 2, 129, 64) 320
dropout_1 (Dropout) (None, 2, 129, 64) 0
zero_padding2d_2 (ZeroPadding) (None, 2, 133, 64) 0
conv2d_2 (Conv2D) (None, 1, 130, 64) 32832
dropout_2 (Dropout) (None, 1, 130, 64) 0
conv2d_3 (Conv2D) (None, 1, 123, 128) 65664
dropout_3 (Dropout) (None, 1, 123, 128) 0
conv2d_4 (Conv2D) (None, 1, 116, 128) 131200
dropout_4 (Dropout) (None, 1, 116, 128) 0
flatten_1 (Flatten) (None, 14848) 0
dense1 (Dense) (None, 256) 3801344
dropout_5 (Dropout) (None, 256) 0
dense2 (Dense) (None, 11) 2827
reshape_2 (Reshape) (None, 11) 0
My Pytorch model code looks like this:
def __init__(self, classes, dr=0.5, bias=False):
super(My_Model, self).__init__()
self.pad = nn.ConstantPad2d((2, 2, 0, 0), 0)
self.dropout = nn.Dropout(dr)
self.conv1 = nn.Conv2d( 1, 64, (1,4), bias=bias)
self.conv2 = nn.Conv2d( 64, 64, (2,4), bias=bias)
self.conv3 = nn.Conv2d( 64, 128, (1,8), bias=bias)
self.conv4 = nn.Conv2d(128, 128, (1,8), bias=bias)
self.linear1 = nn.Linear(self.linear1_input_size,256, bias=bias)
self.linear2 = nn.Linear(256,classes, bias=bias)
def forward(self, x):
x = self.pad(x)
x = F.relu(self.conv1(x))
x = self.dropout(x)
x = self.pad(x)
x = F.relu(self.conv2(x))
x = self.dropout(x)
x = F.relu(self.conv3(x))
x = self.dropout(x)
x = F.relu(self.conv4(x))
x = self.dropout(x)
x = x.view(-1, 14848)
x = F.relu(self.linear1(x))
x = self.dropout(x)
x = F.softmax(self.linear2(x), 1)
return x