@ptrblck you are right. I get the below error when I call .backward()
only on the adv_loss
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-57-6c7629d37758> in <module>()
13
14 for e in range(num_epochs):
---> 15 clsTrain_loss, clsTrain_acc, advTrain_loss, advTrain_acc = laftr_epoch(encoder, classifier, adversary, X_train, y_train, a_train, en_opt, cls_opt, adv_opt, cls_criterion, adv_criterion)
16
17 if e % 10 == 0:
<ipython-input-53-1a2ea661e046> in laftr_epoch(encoder, classifier, adversary, X, y_cls, y_adv, opt_en, opt_cls, opt_adv, cls_criterion, adv_criterion, batch_size)
33 # cls_en_combinedLoss = cls_loss + adv_loss_fixed
34 cls_en_combinedLoss = adv_loss_fixed
---> 35 cls_en_combinedLoss.backward()
36 # print(cls_en_combinedLoss.grad_fn)
37 opt_cls.step()
~/anaconda/envs/pytorch/lib/python3.6/site-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
91 products. Defaults to ``False``.
92 """
---> 93 torch.autograd.backward(self, gradient, retain_graph, create_graph)
94
95 def register_hook(self, hook):
~/anaconda/envs/pytorch/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
88 Variable._execution_engine.run_backward(
89 tensors, grad_tensors, retain_graph, create_graph,
---> 90 allow_unreachable=True) # allow_unreachable flag
91
92
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
Basically, I want my classifier+encoder
to take a gradient step while the adversary
is fixed minimising the combined loss. Then the adversary
will take a step with theclassifier + encoder
fixed maximising the combined loss. What would be the best way to go about achieving this?
My training loop code is below. Any comments on best practices for doing stuff like this is also greatly appreciated! Thanks.
def laftr_epoch(encoder, classifier, adversary, X, y_cls, y_adv, opt_en, opt_cls, opt_adv, cls_criterion, adv_criterion, batch_size=64):
cls_en_combinedLosses = []
cls_en_accs = []
adv_combinedLosses = []
adv_accs = []
for beg_i in range(0, X.shape[0], batch_size):
x_batch = X.iloc[beg_i:beg_i + batch_size].values
y_cls_batch = y_cls[beg_i:beg_i + batch_size]
y_adv_batch = y_adv[beg_i:beg_i + batch_size]
x_batch = torch.from_numpy(x_batch).to(device).float()
y_cls_batch = torch.from_numpy(y_cls_batch).to(device).float()
y_adv_batch = torch.from_numpy(y_adv_batch).to(device).float()
# fix adversary take gradient step with classifier and encoder
encoder.train()
classifier.train()
z = encoder(x_batch)
y_hat = classifier(z)
adversary.eval()
with torch.no_grad():
a_fixed = adversary(z)
opt_cls.zero_grad()
opt_en.zero_grad()
cls_loss = cls_criterion(y_hat, y_cls_batch)
adv_loss_fixed = adv_criterion(a_fixed, y_adv_batch, y_cls_batch)
cls_en_combinedLoss = cls_loss + adv_loss_fixed
cls_en_combinedLoss.backward()
opt_cls.step()
opt_en.step()
# fix encoder and classifier and take gradient step with adversary
encoder.eval()
classifier.eval()
with torch.no_grad():
z_fixed = encoder(x_batch)
y_hat_fixed = classifier(z_fixed)
adversary.train()
a_hat = adversary(z_fixed)
opt_adv.zero_grad()
cls_loss_fixed = cls_criterion(y_hat_fixed, y_cls_batch)
adv_loss = adv_criterion(a_hat, y_adv_batch, y_cls_batch)
adv_combinedLoss = -(cls_loss_fixed + adv_loss)
adv_combinedLoss.backward()
opt_adv.step()
cls_en_combinedLosses.append(cls_en_combinedLoss.item())
adv_combinedLosses.append(adv_combinedLoss.item())
cls_preds = torch.round(y_hat.data).squeeze(1).numpy()
cls_acc = sum(cls_preds == y_cls_batch).numpy()/len(y_cls_batch)
cls_en_accs.append(cls_acc)
adv_preds = torch.round(a_hat.data).squeeze(1).numpy()
adv_acc = sum(adv_preds == y_adv_batch).numpy()/len(y_adv_batch)
adv_accs.append(adv_acc)
return np.mean(cls_en_combinedLosses), np.mean(cls_en_accs), np.mean(adv_combinedLosses), np.mean(adv_accs)