Not only about the error. I have few more doubts on how to do what I want? any help would be appreciated,
Goal: First to freeze all the non-zero weights in Network 1 then copy all the parameters in from Network 2 to Network except the frozen non-zero ones. Finally Network 1 will have it’s previous nonzero parameter freezed with no gradient (they shouldn’t change even I train the net again) and rest of them are from Network which has gradient as true
Questions: 1. Is there better way to do this?
2.How to avoid the error?
Network 1 has almost all zero’s except a few hundred non-zero parameters
class Network1(nn.Module):
"""
def __init__(self, save_features=None, bench_model=False):
super().__init__()
self.conv1 = nn.Conv2d(1, 10, 5, padding=0, bias=True)
self.conv2 = nn.Conv2d(10, 20, 5, bias=True)
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2)
x = F.relu(self.fc1(x.view(-1, 320)))
x = F.log_softmax(self.fc2(x), dim=1)
return x
Network 2 is nothing but newly initialized network,
class Network2(nn.Module):
"""
def __init__(self, save_features=None, bench_model=False):
super().__init__()
self.conv1 = nn.Conv2d(1, 10, 5, padding=0, bias=True)
self.conv2 = nn.Conv2d(10, 20, 5, bias=True)
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2)
x = F.relu(self.fc1(x.view(-1, 320)))
x = F.log_softmax(self.fc2(x), dim=1)
return x
Now first step I did is to freeze all the parameters of non-zeros.
model_intialize=Network2()
model_small=Network1()
for intialized_model,trained_model in zip(model_intialize.parameters(),model_small.parameters()):
#print(trained_model.shape,trained_model)
if trained_model is not None and len(trained_model.shape)>2:
#print(trained_model.shape,torch.sum((trained_model != 0).int()).data.item())
for i in range(trained_model.shape[0]):
#print(trained_model[i][0])
k=0
for j in trained_model[i][0]:
if(torch.sum((j != 0)).data.item()>0):
#print("+++++",len(torch.nonzero(j)),trained_model[i][0][k],'\n',intialized_model[i][0][k])
trained_model[i][0][k]=trained_model[i][0][k].detach()
else:
trained_model[i][0][k]=trained_model[i][0][k].add(intialized_model[i][0][k])
k=k+1
elif len(trained_model.shape)>1 and len(trained_model.shape)<2:
#print("++++++",trained_model,trained_model.shape)
#print("+++++++++++++",trained_model.shape,trained_model)
for trained,intialized in zip(trained_model,intialized_model):
if(torch.sum((trained != 0)).data.item()>0):
#print("----",torch.sum((trained != 0)).data.item(),trained)
trained=trained.detach()
else:
trained=trained.add(intialized)
The above for loop works well. But it’s throwing error after that
File "main.py", line 252, in train
loss.backward()
File "/home/jmandivarapu1/anaconda3/lib/python3.7/site-packages/torch/tensor.py", line 118, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/home/jmandivarapu1/anaconda3/lib/python3.7/site-packages/torch/autograd/__init__.py", line 93, in backward
allow_unreachable=True) # allow_unreachable flag
Questions: 1. Is there better way to do this?
2.How to avoid the error?