Hi~
I’m trying to implement dqn algorithm and there is a problem with making two models with one model class.
Here is my code.
MODEL FILE
import torch as torch
import torch.nn as nn
class Flatten(nn.Module):
def forward(self, x):
N, C, H, W = x.size()
return x.view(N, -1)
class NeuNet(nn.Module):
def __init__(self, action_num):
super(NeuNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(4, 32, kernel_size=8, stride=4),
nn.ReLU(inplace=True),
nn.Conv2d(32, 64, kernel_size=4, stride=2),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, stride=1),
nn.ReLU(inplace=True),
)
self.linear = nn.Sequential(
nn.Linear(3136, 512),
nn.ReLU(inplace=True),
nn.Linear(512, action_num)
)
def forward(self, x):
out = self.conv(x)
N, C, H, W = out.size()
out = self.linear(out.view(N, -1))
return out
MAIN FILE
cpu_dtype = tc.FloatTensor
gpu_dtype = tc.cuda.FloatTensor
main_model = model.NeuNet(4)
update_model = model.NeuNet(4)
tc.manual_seed(7)
a = tc.rand(5, 4, 84, 84).type(cpu_dtype)
b = tc.rand(5, 4).type(cpu_dtype)
loss_fn = nn.SmoothL1Loss()
optimizer = tc.optim.RMSprop(main_model.parameters(), lr=0.00025, alpha=0.99, eps=1e-6)
print('before learning')
out = main_model.forward(a)
print('main_model loss :', tc.sum((out - b)**2/20))
out = update_model.forward(a)
print('update_model loss :', tc.sum((out - b)**2/20))
for i in range(2000):
out = update_model.forward(a)
cost = loss_fn(out, b)
optimizer.zero_grad()
cost.backward()
optimizer.step()
if i % 100 == 0:
print('iter :', i)
print('cost :', cost)
print('\nafter update_model learning')
out = main_model.forward(a)
print('main_model loss :', tc.sum((out - b)**2/20))
out = update_model.forward(a)
print('update_model loss :', tc.sum((out - b)**2/20))
main_model = copy.deepcopy(update_model)
print('\nafter copying up_m to ma_m')
out = main_model.forward(a)
print('main_model loss :', tc.sum((out - b)**2/20))
out = update_model.forward(a)
print('update_model loss :', tc.sum((out - b)**2/20))
print('\nmain predict :', main_model.forward(a))
print('update predict :', update_model.forward(a))
print('label :', b)
As you can see, I made two models like this.
print('before learning')
out = main_model.forward(a)
print('main_model loss :', tc.sum((out - b)**2/20))
out = update_model.forward(a)
print('update_model loss :', tc.sum((out - b)**2/20))
But when I execute the main file, the result comes like this.
before learning
main_model loss : tensor(0.3037, grad_fn=<SumBackward0>)
update_model loss : tensor(0.2928, grad_fn=<SumBackward0>)
iter : 0
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 100
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 200
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 300
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 400
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 500
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 600
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 700
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 800
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 900
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1000
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1100
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1200
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1300
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1400
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1500
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1600
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1700
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1800
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
iter : 1900
cost : tensor(0.1464, grad_fn=<SmoothL1LossBackward>)
after update_model learning
main_model loss : tensor(0.3037, grad_fn=<SumBackward0>)
update_model loss : tensor(0.2928, grad_fn=<SumBackward0>)
after copying up_m to ma_m
main_model loss : tensor(0.2928, grad_fn=<SumBackward0>)
update_model loss : tensor(0.2928, grad_fn=<SumBackward0>)
main predict : tensor([[ 0.0212, 0.0366, 0.0054, -0.0330],
[ 0.0189, 0.0316, 0.0087, -0.0346],
[ 0.0257, 0.0296, 0.0014, -0.0305],
[ 0.0172, 0.0313, 0.0057, -0.0288],
[ 0.0251, 0.0353, 0.0046, -0.0318]], grad_fn=<ThAddmmBackward>)
update predict : tensor([[ 0.0212, 0.0366, 0.0054, -0.0330],
[ 0.0189, 0.0316, 0.0087, -0.0346],
[ 0.0257, 0.0296, 0.0014, -0.0305],
[ 0.0172, 0.0313, 0.0057, -0.0288],
[ 0.0251, 0.0353, 0.0046, -0.0318]], grad_fn=<ThAddmmBackward>)
label : tensor([[0.2898, 0.5942, 0.0620, 0.7024],
[0.2417, 0.4856, 0.7626, 0.2824],
[0.9442, 0.3513, 0.2319, 0.2829],
[0.7488, 0.9602, 0.2700, 0.4687],
[0.7492, 0.6154, 0.5258, 0.2099]])
Update_model doesn’t learn at all.
How this could happen?