How to implement Attacks
Hello everyone, I am a math student and I am experimenting to attack a ResNet18 based classifier (Trained adverbially with FastGradientMethod(…, eps = 0.03). So far everything worked. However now I would like to try different Attacks. For this purpose I use the ART-toolbox (https://adversarial-robustness-toolbox.readthedocs.io/en/stable/index.html)
There are some methods working (FastGradientMethod and ProjectedGradientDescentPyTorch, Wasserstein is too slow) but the others don’t due to some Type Error. What do I have to modify?
My code:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.datasets as datasets
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from torchvision import transforms
import numpy as np
from art.estimators.classification import PyTorchClassifier
import rdnn.torch
import matplotlib.pyplot as plt
transform_test = transforms.Compose([
transforms.ToTensor(),
# You might want to enable this later. For now, leave it commented for compatibility
# with the TensorFlow version of this code.
# transforms.Normalize(
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]
# )
])
# Initialize the test datasets
test_dataset = rdnn.torch.CifarDataset(train=False, transform=transform_test)
x_test = test_dataset.get_x()
y_test = test_dataset.get_y()
print(len(test_dataset))
print("Loaded data.")
#################################################################################
# Load type of model -> don't forget to import if needed
model = Net()
model.eval()
#################################################################################
path_to_checkpoint = "/cluster/work/math/robust-dnn/group5/krimmelm_2020-11-27_13.01.37_247514/ratio_0_6928571428571428"
optimizer, epoch, _ = rdnn.torch.load_model(path_to_checkpoint, model, from_shared_group_dir=False)
print("Loaded model.")
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
classifier = PyTorchClassifier(
model=model,
clip_values=None,
loss=criterion,
optimizer=optimizer,
input_shape=(3, 32, 32),
nb_classes=10,
)
model.eval()
from art.attacks.evasion import AutoAttack
from art.attacks.evasion import AutoProjectedGradientDescent
from art.attacks.evasion import FastGradientMethod
from art.attacks.evasion import ProjectedGradientDescentPyTorch
from art.attacks.evasion import SquareAttack
from art.attacks.evasion import TargetedUniversalPerturbation
from art.attacks.evasion import UniversalPerturbation
from art.attacks.evasion import Wasserstein
eps = 0.03
eps_step = eps/3
norm = np.inf
ATTACK_fun = [
AutoAttack(estimator=classifier, eps=eps, eps_step=eps_step, norm = norm),
AutoProjectedGradientDescent(estimator=classifier, eps=eps, eps_step=eps_step, norm = norm),
FastGradientMethod(estimator=classifier, eps=eps, eps_step=eps_step, norm = norm),
ProjectedGradientDescentPyTorch(estimator=classifier, eps=eps, eps_step=eps_step, norm = norm),
SquareAttack(estimator=classifier, eps=eps, norm = norm),
# TargetedUniversalPerturbation(classifier = classifier, eps = eps, norm = norm ),
# UniversalPerturbation(classifier = classifier, eps = eps, norm = norm ),
Wasserstein(estimator=classifier, eps=eps, eps_step=eps_step, norm = str(norm))
]
for eval_attacks in ATTACK_fun:
print(eval_attacks)
x_test_adv = eval_attacks.generate(x_test)
predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))
predictions_adv = classifier.predict(x_test_adv)
accuracy_adv = np.sum(np.argmax(predictions_adv, axis=1) == y_test) / len(y_test)
print("Accuracy on adversarial examples: {}%".format(accuracy_adv * 100))
My Output
I commented all attacks out except one to get the following output for each method:
ProjectedGradientDescentPyTorch:
<art.attacks.evasion.projected_gradient_descent.projected_gradient_descent_pytorch.ProjectedGradientDescentPyTorch object at 0x2ad31c19fdd0>
Accuracy on benign test examples: 81.53%
Accuracy on adversarial examples: 11.67%
--> THIS WORKS FINE :D
#################################################################################
#################################################################################
AutoProjectedGradientDescent:
<art.attacks.evasion.auto_projected_gradient_descent.AutoProjectedGradientDescent object at 0x2af9efb31c50>
Traceback (most recent call last):
File "Attack_my-ResNet6_1.py", line 79, in <module>
x_test_adv = eval_attacks.generate(x_test)
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/attacks/attack.py", line 74, in replacement_function
return fdict[func_name](self, *args, **kwargs)
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/attacks/evasion/auto_projected_gradient_descent.py", line 465, in generate
f_0 = self.estimator.loss(x=x_k, y=y_batch, reduction="mean")
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/estimators/classification/pytorch.py", line 434, in loss
return loss.detach().numpy()
TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
#################################################################################
#################################################################################
SquareAttack:
<art.attacks.evasion.square_attack.SquareAttack object at 0x2b4b25502ad0>
Traceback (most recent call last):
File "Attack_my-ResNet6_1.py", line 79, in <module>
x_test_adv = eval_attacks.generate(x_test)
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/attacks/attack.py", line 74, in replacement_function
return fdict[func_name](self, *args, **kwargs)
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/attacks/evasion/square_attack.py", line 166, in generate
a_min=self.estimator.clip_values[0],
TypeError: 'NoneType' object is not subscriptable
#################################################################################
#################################################################################
AutoAttack:
<art.attacks.evasion.auto_attack.AutoAttack object at 0x2b2959d2add0>
Traceback (most recent call last):
File "Attack_my-ResNet6_1.py", line 79, in <module>
x_test_adv = eval_attacks.generate(x_test)
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/attacks/attack.py", line 74, in replacement_function
return fdict[func_name](self, *args, **kwargs)
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/attacks/evasion/auto_attack.py", line 176, in generate
x_adv, sample_is_robust = self._run_attack(x=x_adv, y=y, sample_is_robust=sample_is_robust, attack=attack)
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/attacks/evasion/auto_attack.py", line 222, in _run_attack
x_robust_adv = attack.generate(x=x_robust, y=y_robust)
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/attacks/attack.py", line 74, in replacement_function
return fdict[func_name](self, *args, **kwargs)
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/attacks/evasion/auto_projected_gradient_descent.py", line 465, in generate
f_0 = self.estimator.loss(x=x_k, y=y_batch, reduction="mean")
File "/cluster/home/lgraz/RDNN/rdnn-env/lib64/python3.7/site-packages/art/estimators/classification/pytorch.py", line 434, in loss
return loss.detach().numpy()
TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
#################################################################################
#################################################################################
Thank you in advance