Hello there!
I’m trying to implement this Architecture from in pyTorch via the following code:
class Net(nn.Module):
def __init__(self): super(Net, self).__init__() self.dropout1 = nn.Dropout2d(p=0.2) self.conv1 = nn.Conv2d(1, 96, kernel_size=11, stride=4) init.kaiming_normal(self.conv1.weight.data) self.batch1 = nn.BatchNorm2d(96) self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2) self.conv2 = nn.Conv2d(96, 256, kernel_size=5, padding=2) init.kaiming_normal(self.conv2.weight.data) self.batch2 = nn.BatchNorm2d(256) self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2) self.conv3 = nn.Conv2d(256, 384, kernel_size=3, padding=1) init.kaiming_normal(self.conv3.weight.data) self.batch3 = nn.BatchNorm2d(384) self.conv4 = nn.Conv2d(384, 384, kernel_size=3, padding=1) init.kaiming_normal(self.conv4.weight.data) self.batch4 = nn.BatchNorm2d(384) self.conv5 = nn.Conv2d(384, 256, kernel_size=3, padding=1) init.kaiming_normal(self.conv5.weight.data) self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2) self.batch5 = nn.BatchNorm2d(256) self.fc1 = nn.Linear(9216, 2048) # 2048) init.kaiming_normal(self.fc1.weight.data) self.batchfc1 = nn.BatchNorm1d(2048) self.drop_fc1 = nn.Dropout() self.fc2 = nn.Linear(2048, 2048) init.kaiming_normal(self.fc2.weight.data) self.batchfc2 = nn.BatchNorm1d(2048) self.drop_fc2 = nn.Dropout() self.l_1 = nn.Linear(2048,115) init.kaiming_normal(self.l_1.weight.data) self.drop_l1 = nn.Dropout() self.l_2 = nn.Linear(9216,2) init.kaiming_normal(self.l_2.weight.data) self.softmax = nn.LogSoftmax() self.sigmoid = nn.Sigmoid() def forward(self, x): x = self.conv1(x) x = self.batch1(x) x = self.pool1(x) x = self.conv2(x) x = self.batch2(x) x = self.pool2(x) x = self.batch3(self.conv3(x)) x = self.batch4(self.conv4(x)) x = self.conv5(x) x = self.pool5(x) x = self.batch5(x) x = x.view(-1, 9216) f = x x = self.batchfc1(self.fc1(x)) x = self.batchfc2(self.fc2(x)) f = self.l_2(f) x = self.l_1(x) x = self.softmax(x) f = self.softmax(f) return
I have tested their implementation for my dataset, and for a set of 250*250 pictures my code generates the same array sizes after each layer compared to the Original (which is in Theano). I cannot track down what is wrong with my code, I’ve read almost all documentations there is about the nn.modules and Theano layers which are used here.
Also, I’ve used both Adams optimizer and SGD and there is not much difference in the output.
My results are about 90% on Theano, but are about 78% 80 in the torch model.
P.S: here is my complete code in Github.