FGSM, attribute error

I am trying to implement FGSM attack.

what is difference between

x = x.view(1, 1, 28, 28)
x.requires_grad = True

and

x.requires_grad = True
x = x.view(1, 1, 28, 28)

when I tried to run second code, x.grad is None

perturbation = epsilon*x.grad.data.sign()

so it raises an error:

AttributeError: 'NoneType' object has no attribute 'data'

This is my entire code

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import pytorch_lightning as pl
import os

class CNN(pl.LightningModule):
    def __init__(self):
        super().__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc = nn.Linear(7*7*64, 10, bias=True)
        
        nn.init.xavier_uniform_(self.fc.weight)
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        
        return x
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        output = self(x)
        loss = F.cross_entropy(output, y)
        self.log("train_loss",loss)
        return loss
    
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
    
    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x, y = batch
        output = self(x)
        pred = torch.argmax(output)
        return (pred, y)


device = "cuda" if torch.cuda.is_available() else "cpu"
train = datasets.MNIST(os.getcwd(), train=True, transform=transforms.ToTensor(), download=True)
test = datasets.MNIST(os.getcwd(), train=False, transform=transforms.ToTensor(), download=True)
trainer = pl.Trainer(max_epochs=5, gpus=1)
data_loader = DataLoader(train, batch_size=20, shuffle=True)
model = CNN()
trainer.fit(model, data_loader)
x, y = train[0]
x = x.to(device)
x = x.view(1, 1, 28, 28)
x.requires_grad = True
criterion = nn.CrossEntropyLoss()
model.eval()
model.zero_grad()
model = model.to(device)
res = model(x)
y = torch.tensor([y]).to(device)
loss = criterion(res, y)
loss.backward()
epsilon = 0.5
perturbation = epsilon*x.grad.data.sign() 
adv_examle = x + perturbation
adv_prediction = torch.argmax(model(perturbation))
print(f"output: {y.item()}, adv_prediction: {adv_prediction}")

The first approach will create a leaf tensor which requires gradient while the second one returns a non-leaf tensor, as the view operation is performed on the leaf tensor and thus its .grad attribute won’t be populated by default.

Thanks for the reply!