I’m unsure if I understand your use case correctly but in case you want to directly manipulate a trainable parameter (i.e. without calculating the gradients and using an optimizer), you could use a no_grad
context as seen here:
model = nn.Linear(10, 10)
optimizer = torch.optim.Adam(model.parameters(), lr=1.)
out = model(torch.randn(1, 10))
out.mean().backward()
print(model.weight.abs().sum())
# tensor(14.3759, grad_fn=<SumBackward0>)
optimizer.step()
print(model.weight.abs().sum())
# tensor(97.4504, grad_fn=<SumBackward0>)
model.zero_grad()
with torch.no_grad():
model.weight.copy_(torch.ones_like(model.weight))
print(model.weight)
# make sure model is still updated
out = model(torch.randn(1, 10))
out.mean().backward()
print(model.weight.abs().sum())
# tensor(100., grad_fn=<SumBackward0>)
optimizer.step()
print(model.weight.abs().sum())
# tensor(100.3176, grad_fn=<SumBackward0>)