ValueError: can't optimize a non-leaf Tensor

The following code meant to create an image, and allow for backpropagation on the method used to create the image.

The code is meant to be used with a pretrained CNN like this:

When I run the code below:

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

def create_input(input_size, mode=0):
    if mode == 0:
        if type(input_size) is not tuple and type(input_size) is not list:
            input_size = (input_size, input_size) 
        w = torch.arange(0, input_size[1])
        h = torch.arange(0, input_size[0]) 
        w_exp = w.unsqueeze(1).expand((input_size[1], input_size[0])).true_divide(input_size[0]) - 0.5
        h_exp = h.unsqueeze(0).expand((input_size[1], input_size[0])).true_divide(input_size[1]) - 0.5
        return torch.stack((w_exp, h_exp), -1).permute(2,1,0).unsqueeze(0)
    elif mode == 1: # TensorFlow/Lucid Creation method
        if type(input_size) is tuple or type(input_size) is list:
            input_size = input_size[0]
        r = 3.0**0.5
        coord_range = torch.linspace(-r, r, input_size)
        y, x = torch.meshgrid(coord_range, coord_range)
        tensor = torch.stack((x, y), -1).unsqueeze(0).permute(0,3,1,2)
        return tensor

def cppn_image(size, num_channels=16, num_layers=9):
    tensor = nn.Parameter(create_input(size))
    weight_val = nn.Parameter(torch.randn(num_channels,2,1,1))
    tensor = F.conv2d(tensor, weight_val).tanh()
    for i in range(num_layers):
        weight_val = nn.Parameter(torch.randn(num_channels,num_channels,1,1))
        tensor = F.conv2d(tensor, weight_val).tanh()
    weight_val = nn.Parameter(torch.randn(3,num_channels,1,1))
    tensor = F.conv2d(tensor, weight_val).sigmoid()
    return tensor


img = cppn_image((512,460))
optimizer = optim.Adam([img])

I get this error message:

    raise ValueError("can't optimize a non-leaf Tensor")
ValueError: can't optimize a non-leaf Tensor

What am I doing wrong here? And how can I fix it?

Hi,

Why do you pass the image to an optimizer?
Keep in mind that optimizer’s steps are not differentiable and no gradients will be able to flow back.

@albanD

Why do you pass the image to an optimizer?

I’m using this setup for things like neural style transfer and DeepDream visualization, which requires a single image:

    img = nn.Parameter(img)
    num_iterations = 1000

    num_calls = [0]
    def feval():
        num_calls[0] += 1
        optimizer.zero_grad()
        net(img) # Run input through CNN
        loss = 0

        for mod in loss_list:
            loss += mod.loss # Get loss from layers with nn.MSELoss() loss variable

        loss.backward()

        print('Iteration', num_calls[0], 'loss', loss.item())
        return loss

    optimizer = optim.Adam([img], lr = learning_rate)
    while num_calls[0] <= num_iterations:
         optimizer.step(feval)

This strategy was done a lot in the original Torch7, and “feval” was what the “closure” function was called back then.

Keep in mind that optimizer’s steps are not differentiable and no gradients will be able to flow back.

So, how would I make this mini network that I created using Functional.Conv2d work then?

    weight_val = nn.Parameter(torch.randn(num_channels,2,1,1))
    tensor = F.conv2d(tensor, weight_val).tanh()
    for i in range(num_layers):
        weight_val = nn.Parameter(torch.randn(num_channels,num_channels,1,1))
        tensor = F.conv2d(tensor, weight_val).tanh()
    weight_val = nn.Parameter(torch.randn(3,num_channels,1,1))
    tensor = F.conv2d(tensor, weight_val).sigmoid()

I was originally using a class (with nn.Conv2d), but that didn’t seem to work.

These two classes do the same thing as the cppn_image() function above, but I don’t think that I can use them for what I am trying to do?

class CPPN_Conv(nn.Module):

    def __init__(self, size, num_channels=16, num_layers=6, activ_func=nn.Tanh()):
        super(CPPN_Conv, self).__init__()
        self.size = size
        self.net = self.create_net(num_channels, num_layers, activ_func) 		
				
    def create_net(self, num_channels, num_layers, activ_func):
        net = nn.Sequential()
        net.add_module(str(len(net)), nn.Conv2d(in_channels=2, out_channels=num_channels, kernel_size=1))
        net.add_module(str(len(net)), activ_func)
        for l in range(num_layers - 1):
            net.add_module(str(len(net)), nn.Conv2d(in_channels=num_channels, out_channels=num_channels, kernel_size=1))
            net.add_module(str(len(net)), activ_func)
        net.add_module(str(len(net)), nn.Conv2d(in_channels=num_channels, out_channels=3, kernel_size=1))
        net.add_module(str(len(net)), nn.Sigmoid())
        net.apply(self.cppn_normal)
        return net

    def cppn_normal(self, l):
        if type(l) == nn.Conv2d:        
            nn.init.normal_(l.weight)
			
    def forward(self):
        input = create_input(self.size)
        return self.net(input)
		

cppn_c = CPPN_Conv((512,460))
img = cppn_c()



class CPPN_Linear(nn.Module):

    def __init__(self, size, num_channels=16, num_layers=6, activ_func=nn.Tanh()):
        super(CPPN_Linear, self).__init__()
        if type(size) is not tuple:
            size = (size, size)
        self.input_size = size
        self.net = self.create_net(num_channels, num_layers, activ_func)
                
    def create_net(self, num_channels, num_layers, activ_func):
        net = nn.Sequential()
        net.add_module(str(len(net)), nn.Linear(2, num_channels, bias=True))
        net.add_module(str(len(net)), activ_func)
        for l in range(num_layers - 1):
            net.add_module(str(len(net)), nn.Linear(num_channels, num_channels, bias=False))
            net.add_module(str(len(net)), activ_func)
        net.add_module(str(len(net)), nn.Linear(num_channels, 3, bias=False))
        net.add_module(str(len(net)), nn.Sigmoid())
        net.apply(self.cppn_normal)
        return net
        
    def cppn_normal(self, l):
        if type(l) == nn.Linear:        
            nn.init.normal_(l.weight)

    def create_input(self, input_size): 
        w = torch.arange(0, input_size[1])
        h = torch.arange(0, input_size[0]) 
        w_exp = w.unsqueeze(1).expand((input_size[1], input_size[0])).true_divide(input_size[0]) - 0.5
        h_exp = h.unsqueeze(0).expand((input_size[1], input_size[0])).true_divide(input_size[1]) - 0.5
        return torch.stack((w_exp, h_exp), -1).reshape(input_size[1] * input_size[0], 2) # nn.Linear requires a different input shape

    def forward(self):
        input = self.create_input(self.input_size)
        return self.net(input).reshape(self.input_size[1], self.input_size[0], 3).permute(2,1,0).unsqueeze(0)
        
      
cppn_l = CPPN_Linear((512,460))
img = cppn_l()

You can do the same as the example you show here.
In this example, img is a leaf (it was not created in a differentiable manner, it is just a Tensor that you optmize over) so it will also work with pytorch if you do that.