RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [32, 768]], which is output 0 of TanhBackward, is at version 2; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
def forward(self, input, labels):
result = self.bert(**input).pooler_output
label_one = [idx for idx, dat in enumerate(labels) if dat.item() == 0]
label_two = [idx for idx, dat in enumerate(labels) if dat.item() == 1]
alpha = 0.5
lam = np.random.beta(alpha, alpha)
temp_result = result.detach() # 拷贝一个共享内存,合并操作不能进行梯度修改
# 把原始节点的特征, 替换为新节点的特征
if label_one:
label_one_tensor = torch.tensor(label_one).to(device)
deep_label_one = copy.deepcopy(label_one)
random.shuffle(deep_label_one)
label_one_tensor_shuffle = torch.tensor(deep_label_one).to(device)
# with torch.no_grad():
result[label_one_tensor] = lam * temp_result[label_one_tensor, :] + (1 - lam) * temp_result[label_one_tensor_shuffle, :]
print(result)
if label_two:
label_two_tensor = torch.tensor(label_two).to(device)
deep_label_two = copy.deepcopy(label_two)
random.shuffle(deep_label_two)
label_two_tensor_shuffle = torch.tensor(deep_label_two).to(device)
# with torch.no_grad():
result[label_two_tensor] = lam * temp_result[label_two_tensor, :] + (1 - lam) * temp_result[label_two_tensor_shuffle, :]
data = self.head(result)
return data, self.fc(data), labels