How can I minimize quadratic function?

How can I minimize quadratic function? I thought the code below would find the solution x = 2, but it doesn’t.

import torch

x = torch.tensor(.0, requires_grad=True)
y = (x-2)**2

# optimizer = torch.optim.SGD([x], lr=0.0001)
optimizer = torch.optim.Adam([x], lr=0.0001)

# initilizae
print(x,y)

for i in range(30000):
    optimizer.zero_grad()
    y.backward(retain_graph=True)
    optimizer.step()

    if (i + 1) % 1000 == 0:
        print(i + 1, x, y)

Here is the result.

tensor(0., requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
1000 tensor(0.1000, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
2000 tensor(0.2000, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
3000 tensor(0.3000, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
4000 tensor(0.4000, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
5000 tensor(0.5000, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
6000 tensor(0.6000, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
7000 tensor(0.7000, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
8000 tensor(0.8000, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
9000 tensor(0.9000, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
10000 tensor(1.0001, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
11000 tensor(1.1001, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
12000 tensor(1.2001, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
13000 tensor(1.3001, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
14000 tensor(1.4001, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
15000 tensor(1.5001, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
16000 tensor(1.6002, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
17000 tensor(1.7002, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
18000 tensor(1.8002, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
19000 tensor(1.9002, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
20000 tensor(2.0002, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
21000 tensor(2.1001, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
22000 tensor(2.2000, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
23000 tensor(2.2999, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
24000 tensor(2.3998, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
25000 tensor(2.4997, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
26000 tensor(2.5996, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
27000 tensor(2.6995, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
28000 tensor(2.7994, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
29000 tensor(2.8993, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)
30000 tensor(2.9992, requires_grad=True) tensor(4., grad_fn=<PowBackward0>)

The equivalent TF code will be like the blow and it works.

  
import tensorflow as tf

x = tf.Variable(0.0)
y = (x-2)**2

step = tf.train.AdamOptimizer(0.01).minimize(y)

with tf.Session() as sess:
    sess.run([tf.global_variables_initializer()])
    _x, _y = sess.run([x, y])
    print(0, _x, _y)
    for i in range(10000):
        _, _x, _y = sess.run([step, x, y])
        if (i + 1) % 100 == 0:
            print(i + 1, _x, _y)
for i in range(30000):
    optimizer.zero_grad()
    y = (x-2)**2
    y.backward(retain_graph=True)
    optimizer.step()

19000 tensor(1.7238) tensor(1.00000e-02 *7.6316)
20000 tensor(1.7971) tensor(1.00000e-02 *4.1186)
21000 tensor(1.8638) tensor(1.00000e-02 *1.8556)
22000 tensor(1.9210) tensor(1.00000e-03 *6.2550)
23000 tensor(1.9642) tensor(1.00000e-03 *1.2822)
24000 tensor(1.9896) tensor(1.00000e-04 *1.0800)
25000 tensor(1.9986) tensor(1.00000e-06 *1.8781)
26000 tensor(2.0000) tensor(1.00000e-09 *2.1949)
27000 tensor(2.0000) tensor(1.00000e-11 *7.3669)
28000 tensor(2.0000) tensor(1.00000e-11 *2.7512)
29000 tensor(2.0000) tensor(1.00000e-12 *9.6065)
30000 tensor(2.0000) tensor(1.00000e-12 *3.6380)

3 Likes