Hi,
I am trying to practice pytorch by a small example of CNN on mnist. However, I got a weird performance on the test dataset, it first go up and then go down and finally begin to converge.
As shown in this picture,
I use SGD with learning rate = 0.01.
The architecture is define as follow:
class MyDeepNeural(nn.Module):
def __init__(self, p_keep_conv):
super(MyDeepNeural, self).__init__()
self.conv = nn.Sequential()
self.conv.add_module('conv1', nn.Conv2d(1, 32, kernel_size=3, padding=1))
self.conv.add_module('relu1', nn.ReLU())
self.conv.add_module('pool1', nn.MaxPool2d(kernel_size=2))
self.conv.add_module('drop1', nn.Dropout(1 - p_keep_conv))
self.conv.add_module('conv2', nn.Conv2d(32, 64, kernel_size=3, padding=1))
self.conv.add_module('relu2', nn.ReLU())
self.conv.add_module('pool2', nn.MaxPool2d(kernel_size=2))
self.conv.add_module('drop2', nn.Dropout(1 - p_keep_conv))
self.conv.add_module('conv3', nn.Conv2d(64, 128, kernel_size=3, padding=1))
self.conv.add_module('relu3', nn.ReLU())
self.conv.add_module('pool3', nn.MaxPool2d(kernel_size=2))
self.conv.add_module('drop3', nn.Dropout(1 - p_keep_conv))
self.fc = nn.Sequential()
self.fc.add_module('fc1',nn.Linear(128*9, 625))
self.fc.add_module('relu4',nn.ReLU())
self.fc.add_module('fc2',nn.Linear(625,10))
self.fc.add_module('softmax',nn.Softmax())