# Simple test, I really didn't get how autograd calculate these grads

import torch
import torch.nn as nn

dtype = torch.float
device = torch.device(“cpu”)

# H is hidden dimension; D_out is output dimension.

N, D_in, H, D_out = 1, 1, 1, 1

# Create random Tensors to hold input and outputs.

x = torch.randn(N, D_in, device=device, dtype=dtype)

x[0,0]=1000

# Create random Tensors for weights.

w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
nn.init.uniform_(w1,0.5,0.5)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)
nn.init.uniform_(w2,1,1)
w3 = torch.randn(H, 1, device=device, dtype=dtype, requires_grad=True)
nn.init.uniform_(w3,1,1)

w1_ = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
nn.init.uniform_(w1_,1,1)
w2_ = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)
nn.init.uniform_(w2_,1,1)
w3_ = torch.randn(H, 1, device=device, dtype=dtype, requires_grad=True)
nn.init.uniform_(w3_,1,1)

learning_rate = 1e-6

for t in range(500):
# Forward pass: compute predicted y using operations on Tensors;

``````p=torch.tanh(x.mm(w1))
y_pred = p.mm(w2)
y_pred-=y_pred.mean()
m=x.mm(w1).mm(w3)
y_pred+=m
y=y_pred.max(1, keepdim=True)
y0=y_pred.gather(1,y)

y_pred_ = torch.tanh(x.mm(w1_)).mm(w2_)
y_pred_ -=y_pred_.mean()
y_pred_+=x.mm(w1_).mm(w3_)
y_=y_pred_.max(1, keepdim=True)
y1=y_pred_.gather(1,y_)

# Compute and print loss using operations on Tensors.

loss = (y0 - y1).pow(2).sum()
if t % 100 == 99:
print(t, loss.item())

# Use autograd to compute the backward pass.

loss.backward()

# Manually zero the gradients after updating weights