I wanted to implement a weird regularizer that is trained with SGD as follow:

```
R(w) = sum_d exp( -|w_d-t|^2/2sigma) * w_d**2
```

but I do it I get that I am doing backward twice even thought Im not. Why is that?

Implementation of the regularizer:

```
''' Data set '''
a = Variable(torch.FloatTensor(wavelengths), requires_grad=False)
X_train, Y_train = Variable(torch.FloatTensor(A1),requires_grad=False) , Variable(torch.FloatTensor(y_real.reshape(N,1)),requires_grad=False)
## reg params
reg_l = 1
A_param = Variable(torch.FloatTensor([A]), requires_grad=False)
sigma_param = Variable(torch.FloatTensor([sigma]), requires_grad=False)
t_param = Variable(torch.FloatTensor([center]), requires_grad=False)
def get_reg(x, a,A_param,t_param,sigma_param):
D = len(a)
R_x = A_param*torch.exp(-(a - t_param)**2/sigma_param**2)
R_x = 1/R_x
R_x = R_x.view(1,D)
x_2 = (x**2).t()
R_x = R_x.mm(x_2)
return R_x
```

but then SGD training:

```
def train_SGD(mdl, M,eta,nb_iter, dtype, X_train,Y_train, reg_l,R_x):
##
#pdb.set_trace()
N_train,_ = tuple( X_train.size() )
#print(N_train)
for i in range(1,nb_iter+1):
# Forward pass: compute predicted Y using operations on Variables
batch_xs, batch_ys = get_batch2(X_train,Y_train,M,dtype) # [M, D], [M, 1]
## FORWARD PASS
y_pred = mdl(batch_xs)
## Check vectors have same dimension
if vectors_dims_dont_match(batch_ys,y_pred):
pdb.set_trace()
raise ValueError('You vectors don\'t have matching dimensions. It will lead to errors.')
## LOSS + Regularization
if R_x is None:
batch_loss = (1/M)*(y_pred - batch_ys).pow(2).sum()
else:
batch_loss = (1/M)*(y_pred - batch_ys).pow(2).sum()
batch_loss = batch_loss + reg_l*R_x
## BACKARD PASS
batch_loss.backward() # Use autograd to compute the backward pass. Now w will have gradients
## SGD update
for W in mdl.parameters():
delta = eta*W.grad.data
W.data.copy_(W.data - delta)
## train stats
if i % (nb_iter/10) == 0 or i == 0:
#X_train_, Y_train_ = Variable(X_train), Variable(Y_train)
X_train_, Y_train_ = X_train, Y_train
current_train_loss = (1/N_train)*(mdl.forward(X_train_) - Y_train_).pow(2).sum().data.numpy()
print('\n-------------')
print('i = ',i)
print('current_train_loss = ',current_train_loss)
# print(f'eta*W.grad.data = {eta*W.grad.data}')
# print(f'W.grad.data = {W.grad.data}')
## Manually zero the gradients after updating weights
mdl.zero_grad()
final_sgd_error = current_train_loss
return final_sgd_error
```

I only ever call `.backward`

once though…