I did adversarial training with Inception_v1. I used the FGSM to perturb the data. After finishing the training, it says: Got 3160/3564 correct (88.66%) on the clean data
My dataset of 15K samples with 5 categories and those 3.5K is the test data. Then, when I attacked an adversarially trained model, the results are not promising.
Below is the code i used from this repo:
import os
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision.models as models
from torch.autograd import Variable
import torch.nn.functional as F
from adversarialbox.attacks import FGSMAttack, LinfPGDAttack
from adversarialbox.train import adv_train, FGSM_train_rnd
from adversarialbox.utils import to_var, pred_batch, test
# Hyper-parameters
param = {
'img_size': 224,
'batch_size': 64,
'test_batch_size': 16,
'num_epochs': 30,
'delay': 10,
'learning_rate': 1e-3,
'weight_decay': 5e-4,
}
train_loader = torch.utils.data.DataLoader(
datasets.ImageFolder(os.path.join('../torchex1/data/train'), transform=transforms.Compose([
transforms.RandomResizedCrop(param['img_size']),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])),
batch_size=param['batch_size'], shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(
datasets.ImageFolder(os.path.join('../torchex1/data/val'), transform=transforms.Compose([
transforms.Resize(param['img_size']),
transforms.CenterCrop(param['img_size']),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])),
batch_size=param['test_batch_size'], shuffle=False, num_workers=4)
# Setup the model
net = models.googlenet(pretrained=False, aux_logits=False, num_classes=5)
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
net = net.to(device)
net.train()
# Adversarial training setup
adversary = FGSMAttack(net, epsilon=0.3)
#adversary = LinfPGDAttack()
#X_adv = adversary.perturb(X_i, y_i)
# Train the model
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'],
weight_decay=param['weight_decay'])
for epoch in range(param['num_epochs']):
print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs']))
for t, (x, y) in enumerate(train_loader):
x_var, y_var = to_var(x), to_var(y.long())
loss = criterion(net(x_var), y_var)
# adversarial training
if epoch+1 > param['delay']:
# use predicted label to prevent label leaking
y_pred = pred_batch(x, net)
x_adv = adv_train(x, y_pred, net, criterion, adversary)
x_adv_var = to_var(x_adv)
loss_adv = criterion(net(x_adv_var), y_var)
loss = (loss + loss_adv) / 2
if (t + 1) % 100 == 0:
print('t = %d, loss = %.8f' % (t + 1, loss.item()))
optimizer.zero_grad()
loss.backward()
optimizer.step()
test(net, test_loader)
torch.save(net.state_dict(), 'models/adv_trained_inception.pth')
I didn’t use the aux_logits branch
while training. Does it will effect?
could you help? what are the possible flaws that I’m doing? or Is my wrong
Here is the attack script took from here:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, models, transforms
import numpy as np
import matplotlib.pyplot as plt
import os
epsilons = [0, .05, .1, .15, .2, .25, .3]
pretrained_model = "googlenet/weight.pth"
use_cuda=True
Net = models.googlenet(aux_logits=False, num_classes=5)
# Data augmentation and normalization for training
# Just normalization for validation
test_loader = torch.utils.data.DataLoader(
datasets.ImageFolder(os.path.join('data/val'), transform=transforms.Compose([
transforms.Resize(224),
transforms.RandomCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])),
batch_size=1, shuffle=True, num_workers=4)
print("CUDA Available: ",torch.cuda.is_available())
device = torch.device("cuda:1" if (use_cuda and torch.cuda.is_available()) else "cpu")
model = Net.to(device)
model.load_state_dict(torch.load(pretrained_model, map_location='cpu'))
model.eval()
def fgsm_attack(image, epsilon, data_grad):
# Collect the element-wise sign of the data gradient
sign_data_grad = data_grad.sign()
# Create the perturbed image by adjusting each pixel of the input image
perturbed_image = image + epsilon*sign_data_grad
# Adding clipping to maintain [0,1] range
perturbed_image = torch.clamp(perturbed_image, 0, 1)
# Return the perturbed image
return perturbed_image
def test( model, device, test_loader, epsilon ):
# Accuracy counter
correct = 0
adv_examples = []
# Loop over all examples in test set
for data, target in test_loader:
# Send the data and label to the device
data, target = data.to(device), target.to(device)
#print(target)
# Set requires_grad attribute of tensor. Important for Attack
data.requires_grad = True
# Forward pass the data through the model
output = model(data)
init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
#print(init_pred)
# If the initial prediction is wrong, dont bother attacking, just move on
if init_pred.item() != target.item():
continue
# Calculate the loss
loss = criterion(output, target)
# Zero all existing gradients
model.zero_grad()
# Calculate gradients of model in backward pass
loss.backward()
# Collect datagrad
data_grad = data.grad.data
# Call FGSM Attack
perturbed_data = fgsm_attack(data, epsilon, data_grad)
# Re-classify the perturbed image
output = model(perturbed_data)
# Check for success
final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
if final_pred.item() == target.item():
correct += 1
# Special case for saving 0 epsilon examples
if (epsilon == 0) and (len(adv_examples) < 5):
adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
else:
# Save some adv examples for visualization later
if len(adv_examples) < 5:
adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
# Calculate final accuracy for this epsilon
final_acc = correct/float(len(test_loader))
print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(test_loader), final_acc))
# Return the accuracy and an adversarial example
return final_acc, adv_examples
criterion = nn.CrossEntropyLoss()
accuracies = []
examples = []
# Run test for each epsilon
for eps in epsilons:
acc, ex = test(model, device, test_loader, eps)
accuracies.append(acc)
examples.append(ex)
plt.figure(figsize=(5,5))
plt.plot(epsilons, accuracies, "*-")
plt.yticks(np.arange(0, 1.1, step=0.1))
plt.xticks(np.arange(0, .35, step=0.05))
plt.title("Accuracy vs Epsilon")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy")
plt.show()
cnt = 0
plt.figure(figsize=(8,10))
for i in range(len(epsilons)):
for j in range(len(examples[i])):
cnt += 1
plt.subplot(len(epsilons),len(examples[0]),cnt)
plt.xticks([], [])
plt.yticks([], [])
if j == 0:
plt.ylabel("Eps: {}".format(epsilons[i]), fontsize=14)
orig,adv,ex = examples[i][j]
#mean = np.array([0.485, 0.456, 0.406])
#std = np.array([0.229, 0.224, 0.225])
#ex = std * ex + mean
#ex = np.clip(ex, 0, 1)
#ex = ex.permute(1, 2, 0)
map = {0: 'bus', 1: 'pickup', 2: 'sedan', 3: 'truck', 4: 'van'}
plt.title("{} -> {}".format(map[orig], map[adv]))
ex = ex.transpose((1, 2, 0))
plt.imshow(ex)
plt.tight_layout()
plt.show()