I am using a custom Linear Layer in Pytorch, whose job is to add noise to the network.
I have checked also whether there are in-place operation but everything seems okay
class NoisyLinear(nn.Linear):
def __init__(self, in_features, out_features, sigma_init=0.017, bias=True):
super(NoisyLinear, self).__init__(in_features, out_features, bias=bias)
self.sigma_weight = nn.Parameter(torch.full((out_features, in_features), sigma_init))
self.register_buffer("epsilon_weight", torch.zeros(out_features, in_features))
if bias:
self.sigma_bias = nn.Parameter(torch.full((out_features,), sigma_init))
self.register_buffer("epsilon_bias", torch.zeros(out_features))
self.reset_parameters()
def reset_parameters(self):
std = math.sqrt(3 / self.in_features)
self.weight.data.uniform_(-std, std)
self.bias.data.uniform_(-std, std)
def forward(self, input):
self.epsilon_weight.normal_()
bias = self.bias
if bias is not None:
self.epsilon_bias.normal_()
bias = bias + self.sigma_bias * self.epsilon_bias
return F.linear(input, self.weight + self.sigma_weight * self.epsilon_weight, bias)
the Input size would be 5 and the output would be 256. but it gave me that error.
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [256, 5]] is at version 3; expected version 2 instead.