Hi Dears,
I created the following architecture which has one main branch, but in the decision layer it has two braches where resnet FC layer is shared:
class Net1(nn.Module):
def __init__(self, in_features, out_features):
super(Net1, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.Tensor(out_features, in_features))
def forward(self, input, label):
out = torch.mm(input, self.weight.t())
return out
classifier = net1(1024, 624)
class MyRes(nn.Module):
def __init__(self, in_features = 1024, Num_classes = 624):
super(MyRes, self).__init__()
self.in_features = in_features
self.res_model = torchvision.models.resnet50(pretrained=True)
self.res_model.fc = nn.Sequential(
nn.BatchNorm1d(2048),
nn.Dropout(p=0.4),
nn.Linear(2048, in_features),
nn.BatchNorm1d(in_features),
)
self.classifier = classifier
self.res_model.classifier = nn.Linear(in_features, Num_classes)
def forward(self, x, labels):
x = self.res_model(x)
l = self.classifier(x, labels)
x = self.res_model.classifier(x)
return x, l
In the training, I calculated the loss for just the first branch then back-propagated it
model = MyRes()
optimizer = optim.Adam(model.parameters(), lr = lr)
model.train()
pred, preds2 = model(images)
loss = criterion(preds, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
I am wondering what happens at the main branch, especially at the connection point of the two branches !!.
Is there will be two gradient (from self.classifier and self.res_model.classifier ), In back propagation they summed !!!
What happens to the weights in (self.weight) from Net1, Are they will be learned with the network training or stay fixed as they initialized !!
Best Regards