If I implement a dynamic layer as present below, it’s output shape doesn’t get affected. See code below for greater details.
>>> import torch as th
>>> import torch.nn as nn
>>> import torch.nn.functional as F
>>> class DyLinear(nn.Module):
... def __init__(self, in_fea, out_fea, device = 'cuda'):
... super(DyLinear, self).__init__()
... self.device = device
... self.in_fea = in_fea
... self.out_fea = out_fea
... self.weight = nn.Parameter(th.randn(out_fea, in_fea)).to(self.device)
... def forward(self, x):
... x = F.linear(x, self.weight)
... return x
... def update_size(self, size): #slow; doesn't matter much since perform infrequenly
... if size <= self.out_fea:
... raise ValueError(f"New size ({size}) needs to be bigger than current output size ({self.out_fea})")
... else:
... with th.no_grad():
... self.out_fea = size
... self.weight = nn.Parameter(th.cat((self.weight, th.randn(size - self.out_fea, self.in_fea).to(self.device)), 0)).to(self.device)
...
>>> a = th.rand(5)
>>> model = DyLinear(5, 5)
>>> model(a)
tensor([ 8.9683e-42, 0.0000e+00, -3.2745e+27, 8.4919e-43, 0.0000e+00], grad_fn=<SqueezeBackward3>)
>>> model.update_size(6)
>>> model(a)
tensor([2.1514e-34, 2.6429e-38, 1.4125e+14, 4.2039e-45, 1.4466e-36], grad_fn=<SqueezeBackward3>)