@ptrblckās solution requires you to specify full names of names parameters for changed lr.
I wrote a recursive solution which lets you use just the submodule path.
The code:
from pprint import pprint
from typing import Dict
from torchvision import models
def group_wise_lr(model, group_lr_conf: Dict, path=""):
"""
Refer https://pytorch.org/docs/master/optim.html#per-parameter-options
torch.optim.SGD([
{'params': model.base.parameters()},
{'params': model.classifier.parameters(), 'lr': 1e-3}
], lr=1e-2, momentum=0.9)
to
cfg = {"classifier": {"lr": 1e-3},
"lr":1e-2, "momentum"=0.9}
confs, names = group_wise_lr(model, cfg)
torch.optim.SGD([confs], lr=1e-2, momentum=0.9)
:param model:
:param group_lr_conf:
:return:
"""
assert type(group_lr_conf) == dict
confs = []
nms = []
for kl, vl in group_lr_conf.items():
assert type(kl) == str
assert type(vl) == dict or type(vl) == float or type(vl) == int
if type(vl) == dict:
assert hasattr(model, kl)
cfs, names = group_wise_lr(getattr(model, kl), vl, path=path + kl + ".")
confs.extend(cfs)
names = list(map(lambda n: kl + "." + n, names))
nms.extend(names)
primitives = {kk: vk for kk, vk in group_lr_conf.items() if type(vk) == float or type(vk) == int}
remaining_params = [(k, p) for k, p in model.named_parameters() if k not in nms]
if len(remaining_params) > 0:
names, params = zip(*remaining_params)
conf = dict(params=params, **primitives)
confs.append(conf)
nms.extend(names)
plen = sum([len(list(c["params"])) for c in confs])
assert len(list(model.parameters())) == plen
assert set(list(zip(*model.named_parameters()))[0]) == set(nms)
assert plen == len(nms)
if path == "":
for c in confs:
c["params"] = (n for n in c["params"])
return confs, nms
if __name__ == "__main__":
model = models.resnet18(pretrained=True)
test_configs = [
# Give same Lr to all model params
{"lr": 0.3},
# For the below 3 cases, you will need to pass the optimiser overall optimiser params for remaining model params.
# This is because we did not specify optimiser params for all top-level submodules, so defaults need to be supplied
# Refer https://pytorch.org/docs/master/optim.html#per-parameter-options
# Give same Lr to layer4 only
{"layer4": {"lr": 0.3}},
# Give one LR to layer4 and another to rest of model. We can do this recursively too.
{"layer4": {"lr": 0.3},
"lr": 0.5},
# Give one LR to layer4.0 and another to rest of layer4
{"layer4": {"0": {"lr": 0.001},
"lr": 0.3}},
# More examples
{"layer4": {"lr": 0.3,
"0": {"lr": 0.001}}},
{"layer3": {"0": {"conv2": {"lr": 0.001}},
"1": {"lr": 0.003}}},
{"layer4": {"lr": 0.3},
"layer3": {"0": {"conv2": {"lr": 0.001}},
"lr": 0.003},
"lr": 0.001}
]
for cfg in test_configs:
confs, names = group_wise_lr(model, cfg)
print("#" * 140)
pprint(cfg)
print("-" * 80)
pprint(confs)
print("#" * 140)
Usage:
Suppose you have the resnet18
model from torchvision. Now you want to change LR of layer4
and layer3.1
and layer3.0.conv2
:
from torchvision import models
model = models.resnet18(pretrained=True)
confs, names = group_wise_lr(model, {"layer4": {"lr": 0.3},
"layer3": {"0": {"conv2": {"lr": 0.001}},
"1": {"lr": 0.003}}})
# Notice we write in hierarchical structure, we go down the hierarchy till we need, not more.
This generates confs as
```python
[{'lr': 0.3,
'params': <generator object group_wise_lr.<locals>.<genexpr> at 0x11f4a84a0>},
{'lr': 0.001,
'params': <generator object group_wise_lr.<locals>.<genexpr> at 0x11f4a8510>},
{'params': <generator object group_wise_lr.<locals>.<genexpr> at 0x11f4a8580>},
{'lr': 0.003,
'params': <generator object group_wise_lr.<locals>.<genexpr> at 0x11f4a85f0>},
{'params': <generator object group_wise_lr.<locals>.<genexpr> at 0x11f4a87b0>}]
the last {"params": <generator>}
has all the model params we did not specify any lr.
the third {"params": <generator>}
has all params of layer3.0
which arenāt in submodule conv2
this allows you to pass this confs
to an optimiser and do fine-grained lr tuning in a hierarchical manner.