Yes, creating different tensors (trainable and frozen ones) should work. Here is a small example showing this use case:
import torch
import torch.nn as nn
import torch.nn.functional as F
class MyLinear(nn.Module):
def __init__(self, in_features, out_features, bias=True):
super().__init__()
self.weight = nn.Parameter(torch.randn(out_features-2, in_features))
self.register_buffer("frozen_weight", torch.randn(2, in_features))
# do the same with the bias if needed
if bias:
self.bias = nn.Parameter(torch.randn(out_features))
else:
self.register_parameter("bias", None)
def forward(self, x):
weight = torch.cat((self.weight, self.frozen_weight), dim=0)
out = F.linear(x, weight, self.bias)
return out
my_linear = MyLinear(10, 5)
x = torch.randn(8, 10)
my_out = my_linear(x)
my_out.mean().backward()
print(my_linear.weight.grad)
print(my_linear.frozen_weight.grad)
print(my_linear.state_dict())
I’ve used a simple split but you can of course use a more complicated masking etc. Also, note that I haven’t changed the bias
so you might also want to do it.