Hi all,
i want to create a fully connected network that also takes inputs from intermediate layers; these inputs are optional, and where they are lacking, I fill them with zeros.
My class looks like this now:
class FCC(nn.Module):
def __init__(self, hidden_size):
super(FCC, self).__init__()
self.drp = nn.Dropout(0.1)
self.fc1 = nn.Linear(in_features=hidden_size,out_features=60)
self.fc2 = nn.Linear(in_features=60,out_features=50)
self.fc3 = nn.Linear(in_features=52,out_features=30)
self.fc4 = nn.Linear(in_features=34,out_features=10)
self.fc5 = nn.Linear(in_features=10,out_features=2)
self.fc1.weight.data.normal_(mean=0.0, std=0.1)
self.fc2.weight.data.normal_(mean=0.0, std=0.1)
self.fc3.weight.data.normal_(mean=0.0, std=0.1)
self.fc4.weight.data.normal_(mean=0.0, std=0.1)
self.fc5.weight.data.normal_(mean=0.0, std=0.1)
def forward(self,x,cl_input,vis_input):
# print(x.shape)
out = self.fc1(x)
out = F.relu(out,inplace=True)
out = self.fc2(out)
out = F.relu(out,inplace=True)
merge0 = torch.cat((out,vis_input),1)
out = self.fc3(merge0)
out = F.relu(out,inplace=True)
merge = torch.cat((out,cl_input),1)
out = self.fc4(merge)
out = F.relu(out,inplace=True)
out = self.fc5(out)
return out
I noticed that compared with the model without mid-inputs, my training have a mush worse performance, and the losses way more discontinous.
Am I doing something wrong with the gradients?
Thanks