Let’s say I have a module:
class mod(nn.Module):
def __init__(self, in_features, out_features):
super(mod, self).__init__()
self.scalar = nn.Parameter(torch.tensor([1.1]), requires_grad=True)
self.linear1 = nn.Linear(in_features, 10, bias=False, requires_grad=False)
self.linear2 = nn.Linear(10, out_features, bias=False, requires_grad=True)
def forward(self, input):
x = input * scalar
x = self.linear1(x)
x = torch.relu(x)
x = self.linear2(x)
x = torch.relu(x)
return x
Would autograd be able to adjust any arbitrary parameter of the entire forward pass even though there are layers in between that are not learnable? And what if I use some learnable non-linearity like a threshold in between static layers?
class threshold(nn.Module):
def __init__(self):
super(threshold, self).__init__()
self.t = nn.Parameter(torch.tensor([0.2]), requires_grad=True)
def forward(self, input):
return torch.where(x > self.t, x, torch.tensor(0.0))
class mod(nn.Module):
def __init__(self, in_features, out_features):
super(mod, self).__init__()
self.scalar = nn.Parameter(torch.tensor([1.1]), requires_grad=True)
self.linear1 = nn.Linear(in_features, 10, bias=False, requires_grad=False)
self.linear2 = nn.Linear(10, out_features, bias=False, requires_grad=False)
self.threshold1 = threshold()
self.threshold2 = threshold()
def forward(self, input):
x = input * scalar
x = self.linear1(x)
x = self.threshold1(x)
x = self.linear2(x)
x = self.threshold2(x)
return x
Can the gradient still be propagated through here? This is for a hypothetical scenario in which I need to finetune some arbitrary set of learnable module parameters without affecting a network’s main linear layer weights.