Gradients not calculated?

cndcnd · March 6, 2020, 6:04am

  # Calculate the loss
  loss = F.nll_loss(output, target)
  # Zero all existing gradients
  model.zero_grad()
  # Calculate gradients of model in backward pass
  loss.backward()
  # Collect datagrad
  data_grad = data.grad.data

I implemented the pytorch tutorial on adversarial examples, which works fine with fc layers on MNIST, but when I use a convnet on CIFAR10, I get the error message: ‘NoneType’ object has no attribute ‘data’. It seems that gradients are not calculated.

ptrblck · March 6, 2020, 6:38am

Make sure to use requires_grad=True when creating your data input tensor.

PS: Don’t use the .data attribute, as it might have some unwanted side effects.

cndcnd · March 6, 2020, 12:09pm

Loop over all examples in test set

for data, target in test_loader:

  # Send the data and label to the device
  data, target = data.to(device), target.to(device)
  # Set requires_grad attribute of tensor. Important for Attack
  data.requires_grad = True

  # Forward pass the data through the model
  output,lastlr,softlr = model(data,0,0)

  init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability

  # If the initial prediction is wrong, dont bother attacking, just move on
  if init_pred.item() != target.item(): # va solo se batch_size = 1
     continue

  # Calculate the loss
  loss = F.nll_loss(output, target)
  # Zero all existing gradients
  model.zero_grad()
  # Calculate gradients of model in backward pass
  loss.backward()
  # Collect datagrad
  data_grad = data.grad.data

Thanks for the quick reply, ptrblck. The instruction data.requires_grad = True is actually there.

albanD · March 6, 2020, 3:32pm

What is data.requires_grad before you set it? Is it already True?

Can you share a small code sample that show the problem that we can run please?

cndcnd · March 6, 2020, 4:29pm

Hi Alban, here it is. It’s not very small though. Just change the directories and ti should work. Thanks, Alex

import copy
import gc
import os
import matplotlib.pyplot as plt
import numpy as np
import random
import sys
import time
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import threading

# Constants
DATAPATH  = 'U:\\Boschdocs\\Python_dir\\Projects\\data'
TNETPATH  = 'U:\\Boschdocs\\Python_dir\\Projects\\Codeaidaden\\Source\\trainednet.pth'
CONSPATH  = 'U:\\Boschdocs\\Python_dir\\Projects\\Codeaidaden\\Source\\console.txt'
BATCHSIZE = 25
ESPACE    = " "
NETLOAD   = 1
NETSAVE   = 1
NLAYERS   = 8
MASTSC    =-0
SLOPETP   = 1.00
BIASBSE   = 0.00
BLRATE    = 0.001

# Globals
lrsize = np.zeros((NLAYERS,2),dtype=int)
dpinda = np.zeros((8,NLAYERS,2))
corcnt = np.zeros((NLAYERS,784))
cormat = np.zeros((NLAYERS,784,784))
insel  = np.zeros((NLAYERS,3))

# inits vars
time0 = time.clock(); dtime = 0
dloss = 0.0; daccur = 0.0; cnb = 0

# functions to show an image
def imshow(img):
   img = img / 2 + 0.5  # unnormalize
   npimg = img.numpy()
   plt.imshow(np.transpose(npimg, (1, 2, 0)))
   plt.show()

def Tanhact(x):
   x[x < 0] = 0
   return torch.tanh(SLOPETP * (x))

class Net(nn.Module):

   def __init__(self, x):

      super(Net, self).__init__()
      self.pool =  nn.MaxPool2d (2,2)
      self.conv1 = nn.Conv2d ( 3,12,5)
      self.conv2 = nn.Conv2d (12,16,5)
      self.fc5 =   nn.Linear (16*5*5,120)
      self.fc6 =   nn.Linear (120,84)
      self.fc7 =   nn.Linear ( 84,10)

      lrsize[0][0] =  3;     lrsize[0][1] =32
      lrsize[1][0] = 12;     lrsize[1][1] =28
      lrsize[2][0] = 12;     lrsize[2][1] =14
      lrsize[3][0] = 16;     lrsize[3][1] =10
      lrsize[4][0] = 16*5*5; lrsize[4][1] = 1
      lrsize[5][0] =120;     lrsize[5][1] = 1
      lrsize[6][0] = 84;     lrsize[6][1] = 1
      lrsize[7][0] = 10;     lrsize[7][1] = 1

      self.nx = np.empty(NLAYERS, dtype=np.object)
      self.nl = np.empty(NLAYERS, dtype=np.object)
      self.nl[1] = self.conv1
      self.nl[3] = self.conv2
      self.nl[5] = self.fc5
      self.nl[6] = self.fc6
      self.nl[7] = self.fc7
      self.forward(x,0,0)

   def forward(self, x0, opt, sitnx):

      #Tanhact # torch.sigmoid
      if opt == 0: xx = self.nx
      if opt == 1: xx = sitnx

      xx[0] = x0.detach()
      self.nx[1] = Tanhact(self.conv1(xx[0]))
      self.nx[2] = self.pool(xx[1])

      self.nx[3] = Tanhact(self.conv2(xx[2]))
      layeractar = self.pool(xx[3])
      self.nx[4] = layeractar.view(-1, 16*5*5)

      layeractar = self.fc5(xx[4])
      self.nx[5] = Tanhact(SLOPETP*layeractar)
      layeractar = self.fc6(xx[5])
      self.nx[6] = Tanhact(SLOPETP*layeractar)
      layeractar = self.fc7(xx[6])
      self.nx[7] = Tanhact(SLOPETP*layeractar)

      output = layeractar
      lastlr = self.nx[7]
      softlr = torch.nn.Softmax(output)
      return output, lastlr, softlr

# FGSM attack code
def Fgsm_attack(image, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [0,1] range
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image

def test(model, device, test_loader, epsilon):

   # Accuracy counter
   correct = 0
   adv_examples = []

   # Loop over all examples in test set
   for data, target in test_loader:

      # Send the data and label to the device
      data, target = data.to(device), target.to(device)
      # Set requires_grad attribute of tensor. Important for Attack
      data.requires_grad = True

      # Forward pass the data through the model
      output,lastlr,softlr = model(data,0,0)

      init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability

      # If the initial prediction is wrong, dont bother attacking, just move on
      if init_pred.item() != target.item(): # va solo se batch_size = 1
         continue

      # Calculate the loss
      loss = F.nll_loss(output, target)
      # Zero all existing gradients
      model.zero_grad()
      # Calculate gradients of model in backward pass
      loss.backward()
      # Collect datagrad
      data_grad = data.grad.data

      # Call FGSM Attack
      perturbed_data = Fgsm_attack(data, epsilon, data_grad)
      # Re-classify the perturbed image
      output,lastlr,softlr = model(perturbed_data)

      # Check for success
      final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
      if final_pred.item() == target.item():
         correct += 1
         # Special case for saving 0 epsilon examples
         if (epsilon == 0) and (len(adv_examples) < 5):
            adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
            adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
      else:
         # Save some adv examples for visualization later
         if len(adv_examples) < 5:
            adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
            adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )

   # Calculate final accuracy for this epsilon
   final_acc = correct/float(len(test_loader))
   print("Epsilon: {:.2f}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(test_loader), final_acc))

   # Return the accuracy and an adversarial example
   return final_acc, adv_examples

if __name__ == "__main__":
   device = torch.device("cpu")
   if os.path.isfile(CONSPATH): os.remove(CONSPATH)

   transform = transforms.Compose(
      [transforms.ToTensor(), transforms.Normalize(
      (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

   trainset = torchvision.datasets.CIFAR10(root=DATAPATH, train=True,
      download=True, transform=transform)
   testset  = torchvision.datasets.CIFAR10(root=DATAPATH, train=False,
      download=True, transform=transform)

   trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCHSIZE,
      shuffle=True, num_workers=2)
   testloader  = torch.utils.data.DataLoader(testset,  batch_size=1,
      shuffle=True)
   validloader = torch.utils.data.DataLoader(testset,  batch_size=1000,
      shuffle=True)

   # get and show some images
   dataiter = iter(trainloader)
   inputs, labels = dataiter.next()
   #imshow(torchvision.utils.make_grid(inputs))
   # print labels
   #print(' '.join('%5s' % classes[labels[j]] for j in range(5)))

   net = Net(inputs); print(net)
   torch.manual_seed(0)

   criterion = nn.CrossEntropyLoss()
   optimizer = optim.SGD(net.parameters(), lr=BLRATE, momentum=0.9)

   for epoch in range(1):  # loop over the dataset multiple times

      for ii, data in enumerate(trainloader, 0):
         if (ii%50 == 0):
            print('Epoch {} | Iteration {}'.format(epoch, ii))
         # get the inputs; data is a list of [inputs, labels]
         inputs, labels = data

         # Backpropagation
         optimizer.zero_grad()
         output, lastlr, softlr = net.forward(inputs,0,0)
         loss = criterion(output, labels)
         loss.backward()
         optimizer.step()

         dloss += loss.item(); cnb += 1
         #output = (output > 0.5).float()
         output = output.float()
         bestind = torch.max(output, 1).indices
         for bi in range(BATCHSIZE):
            #if bestval[bi] < 0.5: bestind[bi] = -1
            if net.nx[7][bi][bestind[bi]] < 0.5: bestind[bi] = -1
         daccur += torch.mean((bestind == labels).float())

         # print console
         if ii%2000 == 0:
            # accuracy et al
            dtime = (time.clock() - time0)/60
            content = ""
            if os.path.isfile(CONSPATH):
               consfn = open(CONSPATH, "r")
               content = consfn.read()
               consfn.close(); os.remove(CONSPATH)
            consfn = open(CONSPATH, "w")
            consfn.write("\n Ep({:3d},{:d})" .format(epoch, int(ii/cnb)))
            consfn.write("   dloss:  {:7.3f}".format(dloss/cnb))
            consfn.write("    daccur:{:9.3f}".format(daccur/cnb))
            consfn.write("\n BIASBSE {:9.3f}".format(BIASBSE))
            consfn.write("\n last lr:")
            for ci in range(10):
               consfn.write(" {:8.3f} ".format(lastlr[0][ci]))
            consfn.write("\n output: ")
            for ci in range(10):
               consfn.write(" {:8.3f} ".format(output[0][ci]))

            consfn.write(" durat (min): {:4.1f} ___\n" .format(dtime))
            consfn.write(content)
            consfn.close()

            # reinits vars
            time0 = time.clock(); dtime = 0
            dloss = 0.0; daccur = 0.0; cnb = 0
            dpinda = np.zeros_like(dpinda)
            corcnt = np.zeros_like(corcnt)
            cormat = np.zeros_like(cormat)
            insel  = np.zeros_like(insel)
      gc.collect() # end epoch
   print('Finished Training')
   net.eval()

   accuracies = []; examples = []
   epsilons = [0, .05, .1, .15, .2, .25, .3]

   if 1 == 1:
      # Run test for each epsilon
      for eps in epsilons:
         acc, ex = test(net, device, testloader, eps)
         accuracies.append(acc)
         examples.append(ex)

      plt.figure(figsize=(5, 5))
      plt.plot(epsilons, accuracies, "*-")
      plt.yticks(np.arange(0, 1.1, step=0.1))
      plt.xticks(np.arange(0, .35, step=0.05))
      plt.title("Accuracy vs Epsilon")
      plt.xlabel("Epsilon")
      plt.ylabel("Accuracy")
      plt.show()

      # Plot several examples of adversarial samples at each epsilon
      cnt = 0
      plt.figure(figsize=(8, 10))
      for i in range(len(epsilons)):
         for j in range(len(examples[i])):
            cnt += 1
            plt.subplot(len(epsilons), len(examples[0]), cnt)
            plt.xticks([], [])
            plt.yticks([], [])
            if j == 0:
               plt.ylabel("Eps: {}".format(epsilons[i]), fontsize=14)
            orig, adv, ex = examples[i][j]
            plt.title("{} -> {}".format(orig, adv))
            plt.imshow(ex, cmap="gray")
      plt.tight_layout()
      plt.show()

albanD · March 6, 2020, 8:03pm

I don’t have these data, so it cannot work.
But it’s most likely that your data already requires grad no?