Grad is None for leaf variable

I’m trying to change input x according to its loss, but I found x’s grad was always none, so x can’t update. And x is also a leaf variable.
Here is my code.

def CBA(model, x, y=‘L’, k=5, iters=1000, batch_size=1):
“”" CBA (Classification Boundary Attack [targeted attack])
args:
model: victim model (logits outputs)
x: a single input
y: target label selection strategy (‘L’ and ‘R’)
k: threshold
iters: optimization budget
return:
optimized input x
“”"
x = Variable(x.clone().detach(), requires_grad=True)
relu = torch.nn.ReLU()
z = model.predict(x, batch_size=batch_size)[0]
i = np.argmax(z)
learning_rate = 0.001
if y == ‘L’: # least-like
j = np.argmin(z)
elif y == ‘R’: # random
ll = list(range(z.shape[0]))
ll.remove(i)
j = random.choice(ll)

optimizer = torch.optim.Adam([x], lr=learning_rate)
total_step = 0
loss_fn = lambda a, i, j, k, t: relu(a[i] - a[j] + k) + relu(a[t] - a[i])
for iter in range(iters):
    z = model.predict(x, batch_size=batch_size)[0]
    a = torch.from_numpy(z)
    a.requires_grad_(True)
    temp = z.copy()
    temp[i] = -1000
    temp[j] = -1000
    t = np.argmax(temp)  # max index except i,j
    optimizer.zero_grad()
    loss = loss_fn(a, i, j, k, t)
    loss.requires_grad_(True)
    loss.backward()
    optimizer.step()
    if (relu(a[i] - a[j] + k) + relu(a[t] - a[i])).detach().numpy() == 0:
        return x
    total_step += 1
    if total_step % 100 == 0:
        print(loss)

return x

if name == ‘main’:
dataset_name = ‘CIFAR10’
NUMS = 100 # NUMS: size of generated examples
args = parse_args()
defense_config = mlconfig.load(args.fp_config)
source_model: torch.nn.Sequential = defense_config.source_model()
optimizer = defense_config.optimizer(source_model.parameters())
source_model: PyTorchClassifier = __load_model(source_model,
optimizer,
image_size=defense_config.source_model.image_size,
num_classes=defense_config.source_model.num_classes,
filename=args.filename,
pretrained_dir=args.pretrained_dir)

valid_loader = defense_config.dataset(train=False)
advx = []
sum = 0
for data in valid_loader:
    imgs, targets = data
    imgs = imgs.to(device)
    targets = targets.to(device)
    rand_num = random.randint(0, 127)
    input = imgs[rand_num].unsqueeze(0)
    x = CBA(source_model, input, batch_size=1)
    advx.append(x[0])
    sum += 1
    if sum == 50:
        break

advx = np.array(advx)
advx_labels = nn.functional.one_hot(np.argmax(source_model.predict(advx), batch_size=50), 10)

# save the examples.
np.savez("./key_xy.npz", x_key=advx, y_key=advx_labels)
print("Key (x,y) saved.")

It seems you are using numpy in your model, which isn’t supported by Autograd and thus not differentiable:

    z = model.predict(x, batch_size=batch_size)[0]
    a = torch.from_numpy(z)
    a.requires_grad_(True)

Creating a tensor and calling .requires_grad_(True) on it won’t fix the issue since the numpy array is already detached from the computation graph.
Make sure to use PyTorch operations only or write a custom autograd.Function including the backward pass if you really need to use 3rd party libraries such as numpy.

Thank you! It helps me a lot.