How would I add a regressor "head" to the classifier?

billintser · December 1, 2018, 5:59am

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

I’m newbie to Pytorch so I need some advice on the following.
let’s say I have the code above which is a pretty simple classifer. I need to add a regressor head to the last convolution layer with a L2 norm loss. How would this change the forward function? Now I have two outputs - one from classifier and one from regressor? How would I add two layers on top of the last convolution layer (instead of just one) now that I have both regressor head and classifer head branching out from the last convolution layer? Also, how would I call the backward loss?

Thanks!

justusschock · December 1, 2018, 6:57am

This is basically very simple:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        # add regression layers
        self.fc4 = nn.Linear(16*5*5, 120)
        self.fc5 = nn.Linear(120, 84)
        self.fc6 = nn.Linear(84, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)

        # classification 
        x_clf = F.relu(self.fc1(x))
        x_clf = F.relu(self.fc2(x_clf))
        x_clf = self.fc3(x_clf)
 
        # regression
        x_reg = F.relu(self.fc4(x))
        x_reg = F.relu(self.fc5(x_reg) )
        x_reg = self.fc6(x_reg) 

        return x_clf, x_reg



criterion_clf = nn.CrossEntropyLoss()
criterion_reg = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels_clf, labels_reg = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs_clf, outputs_reg = net(inputs)
        loss = criterion_clf(outputs_clf, labels_clf) + criterion_reg(outputs_reg, labels_reg) 
        loss.backward()
        optimizer.step()

Alternatively you could zéro the grad and then backward the losses separately and call the optimizer step afterwards, since the gradients will be accumulated by default.