I am working on a kaggle dataset, in one of the kernel, this guys implemented a CNN in keras with 93% validation accuracy. I tried to reproduce the structure in Pytorch. But my Pytorch version only got 70% accuray. Is there something that I missed from the Keras?
Here is the original keras code:
model = Models.Sequential()
model.add(Layers.Conv2D(200,kernel_size=(3,3),activation='relu',input_shape=(150,150,3)))
model.add(Layers.Conv2D(180,kernel_size=(3,3),activation='relu'))
model.add(Layers.MaxPool2D(5,5))
model.add(Layers.Conv2D(180,kernel_size=(3,3),activation='relu'))
model.add(Layers.Conv2D(140,kernel_size=(3,3),activation='relu'))
model.add(Layers.Conv2D(100,kernel_size=(3,3),activation='relu'))
model.add(Layers.Conv2D(50,kernel_size=(3,3),activation='relu'))
model.add(Layers.MaxPool2D(5,5))
model.add(Layers.Flatten())
model.add(Layers.Dense(180,activation='relu'))
model.add(Layers.Dense(100,activation='relu'))
model.add(Layers.Dense(50,activation='relu'))
model.add(Layers.Dropout(rate=0.5))
model.add(Layers.Dense(6,activation='softmax'))
model.compile(optimizer=Optimizer.Adam(lr=0.0001),loss='sparse_categorical_crossentropy',metrics=['accuracy'])
model.summary()
SVG(model_to_dot(model).create(prog='dot', format='svg'))
Utils.plot_model(model,to_file='model.png',show_shapes=True)
Here is my Pytorch code:
import torch.nn as nn
import torch.nn.functional as F
# define the CNN architecture from this kaggle example https://www.kaggle.com/uzairrj/beg-tut-intel-image-classification-93-76-accur
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# convolutional layer (sees 150x150x3 image tensor)112,56,28
self.conv1 = nn.Conv2d(3, 200, 3)
self.conv1 = nn.DataParallel(self.conv1)
# convolutional layer (sees 16x16x16 tensor)
self.conv2 = nn.Conv2d(200, 180, 3)
self.conv2 = nn.DataParallel(self.conv2)
# convolutional layer (sees 8x8x32 tensor)
self.conv3 = nn.Conv2d(180, 180, 3)
self.conv3 = nn.DataParallel(self.conv3)
self.conv4 = nn.Conv2d(180, 140, 3)
self.conv4 = nn.DataParallel(self.conv4)
self.conv5 = nn.Conv2d(140, 100, 3)
self.conv5 = nn.DataParallel(self.conv5)
self.conv6 = nn.Conv2d(100, 50, 3)
self.conv6 = nn.DataParallel(self.conv6)
# max pooling layer
self.pool = nn.MaxPool2d(5, 5)
# linear layer (7 * 7 * 128 -> 1024)
self.fc1 = nn.Linear(800, 180)
self.fc1 = nn.DataParallel(self.fc1)
# linear layer (500 -> 10)
self.fc2 = nn.Linear(180, 100)
self.fc2 = nn.DataParallel(self.fc2)
self.fc3 = nn.Linear(100, 50)
self.fc3 = nn.DataParallel(self.fc3)
self.fc4 = nn.Linear(50, 6)
# dropout layer (p=0.5)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
# add sequence of convolutional and max pooling layers
x = F.relu(self.conv1(x))
x = self.pool(F.relu(self.conv2(x)))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = F.relu(self.conv5(x))
x = self.pool(F.relu(self.conv6(x)))
# flatten image input
x = x.view(-1, 800)
# add dropout layer
# add 1st hidden layer, with relu activation function
x = F.relu(self.fc1(x))
# add 2nd hidden layer, with relu activation function
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
# add dropout layer
x = self.dropout(x)
x = self.fc4(x)
return x
# create a complete CNN
model = Net()
print(model)
# move tensors to GPU if CUDA is available
model.to('cuda')
import torch.optim as optim
# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()
# specify optimizernnnn
optimizer = optim.Adam(model.parameters(), lr=1e-4)