I am trying to implement FGSM attack.
what is difference between
x = x.view(1, 1, 28, 28)
x.requires_grad = True
and
x.requires_grad = True
x = x.view(1, 1, 28, 28)
when I tried to run second code, x.grad is None
perturbation = epsilon*x.grad.data.sign()
so it raises an error:
AttributeError: 'NoneType' object has no attribute 'data'
This is my entire code
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import pytorch_lightning as pl
import os
class CNN(pl.LightningModule):
def __init__(self):
super().__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = nn.Linear(7*7*64, 10, bias=True)
nn.init.xavier_uniform_(self.fc.weight)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def training_step(self, batch, batch_idx):
x, y = batch
output = self(x)
loss = F.cross_entropy(output, y)
self.log("train_loss",loss)
return loss
def configure_optimizers(self):
optimizer = optim.Adam(self.parameters(), lr=1e-3)
return optimizer
def predict_step(self, batch, batch_idx, dataloader_idx=0):
x, y = batch
output = self(x)
pred = torch.argmax(output)
return (pred, y)
device = "cuda" if torch.cuda.is_available() else "cpu"
train = datasets.MNIST(os.getcwd(), train=True, transform=transforms.ToTensor(), download=True)
test = datasets.MNIST(os.getcwd(), train=False, transform=transforms.ToTensor(), download=True)
trainer = pl.Trainer(max_epochs=5, gpus=1)
data_loader = DataLoader(train, batch_size=20, shuffle=True)
model = CNN()
trainer.fit(model, data_loader)
x, y = train[0]
x = x.to(device)
x = x.view(1, 1, 28, 28)
x.requires_grad = True
criterion = nn.CrossEntropyLoss()
model.eval()
model.zero_grad()
model = model.to(device)
res = model(x)
y = torch.tensor([y]).to(device)
loss = criterion(res, y)
loss.backward()
epsilon = 0.5
perturbation = epsilon*x.grad.data.sign()
adv_examle = x + perturbation
adv_prediction = torch.argmax(model(perturbation))
print(f"output: {y.item()}, adv_prediction: {adv_prediction}")