Problem with using multiple models with one module

Hi~
I’m trying to implement dqn algorithm and there is a problem with making two models with one model class.
Here is my code.

MODEL FILE

import torch as torch
import torch.nn as nn

class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size()
        return x.view(N, -1)

class NeuNet(nn.Module):
    def __init__(self, action_num):
        super(NeuNet, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(4, 32, kernel_size=8, stride=4),
            nn.ReLU(inplace=True),

            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU(inplace=True),
        )

        self.linear = nn.Sequential(
            nn.Linear(3136, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, action_num)
        )

    def forward(self, x):
        out = self.conv(x)
        N, C, H, W = out.size()
        out = self.linear(out.view(N, -1))
        return out

MAIN FILE

cpu_dtype = tc.FloatTensor
gpu_dtype = tc.cuda.FloatTensor

main_model = model.NeuNet(4)
update_model = model.NeuNet(4)

tc.manual_seed(7)
a = tc.rand(5, 4, 84, 84).type(cpu_dtype)
b = tc.rand(5, 4).type(cpu_dtype)

loss_fn = nn.SmoothL1Loss()
optimizer = tc.optim.RMSprop(main_model.parameters(), lr=0.00025, alpha=0.99, eps=1e-6)

print('before learning')
out = main_model.forward(a)
print('main_model loss :', tc.sum((out - b)**2/20))
out = update_model.forward(a)
print('update_model loss :', tc.sum((out - b)**2/20))


for i in range(2000):
    out = update_model.forward(a)
    cost = loss_fn(out, b)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if i % 100 == 0:
        print('iter :', i)
        print('cost :', cost)


print('\nafter update_model learning')
out = main_model.forward(a)
print('main_model loss :', tc.sum((out - b)**2/20))
out = update_model.forward(a)
print('update_model loss :', tc.sum((out - b)**2/20))



main_model = copy.deepcopy(update_model)

print('\nafter copying up_m to ma_m')
out = main_model.forward(a)
print('main_model loss :', tc.sum((out - b)**2/20))
out = update_model.forward(a)
print('update_model loss :', tc.sum((out - b)**2/20))



print('\nmain predict :', main_model.forward(a))
print('update predict :', update_model.forward(a))
print('label :', b)

As you can see, I made two models like this.

print('before learning')
out = main_model.forward(a)
print('main_model loss :', tc.sum((out - b)**2/20))
out = update_model.forward(a)
print('update_model loss :', tc.sum((out - b)**2/20))

But when I execute the main file, the result comes like this.

before learning
main_model loss : tensor(0.3037, grad_fn=<SumBackward0>)
update_model loss : tensor(0.2928, grad_fn=<SumBackward0>)
iter : 0
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 100
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 200
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 300
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 400
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 500
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 600
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 700
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 800
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 900
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1000
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1100
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1200
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1300
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1400
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1500
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1600
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1700
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1800
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1900
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)

after update_model learning
main_model loss : tensor(0.3037, grad_fn=<SumBackward0>)
update_model loss : tensor(0.2928, grad_fn=<SumBackward0>)

after copying up_m to ma_m
main_model loss : tensor(0.2928, grad_fn=<SumBackward0>)
update_model loss : tensor(0.2928, grad_fn=<SumBackward0>)

main predict : tensor([[ 0.0212,  0.0366,  0.0054, -0.0330],
        [ 0.0189,  0.0316,  0.0087, -0.0346],
        [ 0.0257,  0.0296,  0.0014, -0.0305],
        [ 0.0172,  0.0313,  0.0057, -0.0288],
        [ 0.0251,  0.0353,  0.0046, -0.0318]], grad_fn=<ThAddmmBackward>)
update predict : tensor([[ 0.0212,  0.0366,  0.0054, -0.0330],
        [ 0.0189,  0.0316,  0.0087, -0.0346],
        [ 0.0257,  0.0296,  0.0014, -0.0305],
        [ 0.0172,  0.0313,  0.0057, -0.0288],
        [ 0.0251,  0.0353,  0.0046, -0.0318]], grad_fn=<ThAddmmBackward>)
label : tensor([[0.2898, 0.5942, 0.0620, 0.7024],
        [0.2417, 0.4856, 0.7626, 0.2824],
        [0.9442, 0.3513, 0.2319, 0.2829],
        [0.7488, 0.9602, 0.2700, 0.4687],
        [0.7492, 0.6154, 0.5258, 0.2099]])

Update_model doesn’t learn at all.
How this could happen?

You are not optimizing the correct parameters (main_model instead of update_model)

Oh yeah, I just realized what I had mistaken and just about to erase this posting!!!
Thank you for replying though!!