Hi @smth,
I tried this method you provided with following code when defining my customized module list (a simple LeNet example here) and forward
, backward
operation:
class LeNetLayerSplit(nn.Module):
def __init__(self):
super(LeNetLayerSplit, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5, 1)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.fc1 = nn.Linear(4*4*50, 500)
self.fc2 = nn.Linear(500, 10)
self.ceriation = nn.CrossEntropyLoss()
self.module_list_0 = nn.ModuleList([self.conv1, nn.MaxPool2d(2, stride=2), nn.ReLU(),
self.conv2, nn.MaxPool2d(2, stride=2), nn.ReLU()])
self.module_list_1 = nn.ModuleList([self.fc1, self.fc2])
self._name = "LeNet_layer_split"
def forward(self, x, target):
for sub_module in self.module_list_0:
x = sub_module(x)
x.detach_()
x = x.view(-1, 4*4*50)
for sub_module in self.module_list_1:
x = sub_module(x)
x.detach_()
loss = self.ceriation(x, target)
return x, loss
def backward(self, grad_output):
for m in reversed(self.module_list_1):
grad_output = m.backward(grad_output)
grad_output.view(-1, 50, 4, 4)
for n in reversed(self.module_list_0):
grad_output = n.backward(grad_output)
return grad_output
When calling this model, I used the following code:
def build_model(self):
self.network = LeNetLayerSplit()
# this is only used for test
self.optimizer = torch.optim.SGD(self.network.parameters(), lr=self.lr, momentum=self.momentum)
def train(self, train_loader=None):
self.network.train()
# iterate of epochs
for i in range(self.max_num_epochs):
for batch_idx, (data, y_batch) in enumerate(train_loader):
iter_start_time = time.time()
data, target = Variable(data, requires_grad=True), Variable(y_batch)
self.optimizer.zero_grad()
logits, loss = self.network(data, target)
print("Trial Loss: {}".format(loss.data[0]))
print("Start Backward Prop Process: ")
loss.backward()
But I get the error of RuntimeError: there are no graph nodes that require computing gradients
. I guess I call the backward
function in a wrong way, and simple search returns no related issue.
But when I read the original code of autograd
variable, I found that in this line https://github.com/pytorch/pytorch/blob/master/torch/autograd/variable.py#L235 results generated during the forward process are set to requires_grad=False
when calling detach_
, is this issue caused by that? If so, how can I solve it? Please provide me more details about this.
Thanks a lot!