Why does the unused parameters of a network affect the forward pass?

Paritosh · July 27, 2018, 5:34pm

I am using only a part of a pretrained net. If I keep the unused parameters, then I am getting the following results:
Output of method 1:
Epoch: 0 Iteration: 0 Loss: tensor(18.7229, device=‘cuda:0’)
However, if I get rid of the unused parameters, then I am getting the following results:
Output of method 2:
Epoch: 0 Iteration: 0 Loss: tensor(30.2404, device=‘cuda:0’)

Why would the unused parameters affect the outcome of the forward pass?

justusschock · July 28, 2018, 6:02pm

They shouldn’t. Are you sure you don’t use them? Are you fine-tuning the network / continue training ?

Paritosh · July 28, 2018, 6:13pm

@justusschock tried with both eval() and train() modes. I am getting different results in both the cases. For more clarification I have attached my code below (first the pretrained net and then my added layers):

Output of pretrained net goes into my added layers.

*************************** Pretrained net part (keeping the unused layers):

class C3D(nn.Module):
def init(self):
super(C3D, self).init()

    self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

    self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

    self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

    self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

    self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1))

    self.fc6 = nn.Linear(8192, 4096)
    self.fc7 = nn.Linear(4096, 4096)
    self.fc8 = nn.Linear(4096, 487)

    self.dropout = nn.Dropout(p=0.5)

    self.relu = nn.ReLU()
    self.softmax = nn.Softmax(dim=1)

def forward(self, x):
    h = self.relu(self.conv1(x))
    h = self.pool1(h)

    h = self.relu(self.conv2(h))
    h = self.pool2(h)

    h = self.relu(self.conv3a(h))
    h = self.relu(self.conv3b(h))
    h = self.pool3(h)

    h = self.relu(self.conv4a(h))
    h = self.relu(self.conv4b(h))
    h = self.pool4(h)

    h = self.relu(self.conv5a(h))
    h = self.relu(self.conv5b(h))
    h = self.pool5(h)

    h = h.view(-1, 8192)
    h = self.relu(self.fc6(h))
    #h = self.dropout(h)
    # h = self.relu(self.fc7(h))
    # h = self.dropout(h)

    # logits = self.fc8(h)
    # probs = self.softmax(logits)

    return h

********************* Pretrained net part (not loading the unused parameters):

class C3D_altered(nn.Module):
def init(self):
super(C3D_altered, self).init()

    self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

    self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

    self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

    self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

    self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1))

    self.fc6 = nn.Linear(8192, 4096)
    # self.fc7 = nn.Linear(4096, 4096)
    # self.fc8 = nn.Linear(4096, 487)

    self.dropout = nn.Dropout(p=0.5)

    self.relu = nn.ReLU()
    self.softmax = nn.Softmax(dim=1)


def forward(self, x):
    h = self.relu(self.conv1(x))
    h = self.pool1(h)

    h = self.relu(self.conv2(h))
    h = self.pool2(h)

    h = self.relu(self.conv3a(h))
    h = self.relu(self.conv3b(h))
    h = self.pool3(h)

    h = self.relu(self.conv4a(h))
    h = self.relu(self.conv4b(h))
    h = self.pool4(h)

    h = self.relu(self.conv5a(h))
    h = self.relu(self.conv5b(h))
    h = self.pool5(h)

    h = h.view(-1, 8192)
    h = self.relu(self.fc6(h))
    # h = self.dropout(h)
    # h = self.relu(self.fc7(h))
    # h = self.dropout(h)

    # logits = self.fc8(h)
    # probs = self.softmax(logits)

    return h

My newly added layers on top of the pretrained net part:

class my_fc(nn.Module):
def init(self):
super(my_fc, self).init()
self.fc_1 = nn.Linear(4096,1)
self.fc_2 = nn.Linear(4096,3)
self.fc_3 = nn.Linear(4096,2)
self.fc_4 = nn.Linear(4096,4)
self.fc_5 = nn.Linear(4096,10)
self.fc_6 = nn.Linear(4096,8)

def forward(self, x):
    op1 = self.fc_1(x)
    op2 = self.fc_2(x)
    op3 = self.fc_3(x)
    op4 = self.fc_4(x)
    op5 = self.fc_5(x)
    op6 = self.fc_6(x)

    return op1, op2, op3, op4, op5, op6

Paritosh · August 1, 2018, 11:55pm

I tried with a minimal example, and I don’t see unused parameters affecting the forward pass. But I am still facing the problem in my main code, so there should be some mistake that I am not able to identify.