recently implement the code from the paper shake-shake(https://pdfs.semanticscholar.org/22aa/426aeffb77339646cc03da8e94de22396efc.pdf)
class ShakeNoiseFunction(torch.autograd.Function):
def forward(self, x1,x2):
weight1 = torch.rand(1)
weight2 = 1 - weight1
x1 = x1 * weight1.numpy().tolist()[0]
x2 = x2 * weight2.numpy().tolist()[0]
return x1 + x2
def backward(self, x):
g = x[0]
weight1 = torch.rand(1)
weight2 = 1 - weight1
g1 = g * weight1.numpy().tolist()[0]
g2 = g * weight2.numpy().tolist()[0]
return g1,g2
but throw the error “could not compute gradients for some functions”