You should be able to add your custom buffers to the corresponding param_group
via:
# setup
lin = nn.Linear(1, 1)
optimizer = torch.optim.Adam(lin.parameters(), lr=1e-3)
# dummy training
out = lin(torch.randn(1, 1)).backward()
optimizer.step()
# check param_group
print(optimizer.param_groups[0])
# register custom tensor
optimizer.param_groups[0]['my_buffer'] = torch.tensor(100)
print(optimizer.state_dict())
# save state_dict and restore
sd = optimizer.state_dict()
optimizer = torch.optim.Adam(lin.parameters(), lr=1e-3)
print(optimizer.state_dict())
optimizer.load_state_dict(sd)
print(optimizer.state_dict())
# dummy training
out = lin(torch.randn(1, 1)).backward()
optimizer.step()