DQN saved model doesn't play correct

I have the following dqn model:

class DQN(nn.Module):

    def __init__(self, input_shape, n_actions):
        super(DQN, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )

        conv_out_size = self._get_conv_out(input_shape)
        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, n_actions)
        )

    def _get_conv_out(self, shape):
        o = self.conv(Variable(torch.zeros(1, *shape)))
        return int(np.prod(o.size()))

    def forward(self, x):
        fx = x.float() / 256.0
        conv_out = self.conv(fx).view(fx.size()[0], -1)
        return self.fc(conv_out)

I trained it for Pong and got more than 18.0 mean reward for the last 100 games.
It was saved:

torch.save(net.state_dict(), 'pong_model.pt')

But when I try to load it and play some games I get wrong results.

net = models.DQN(env.observation_space.shape, env.action_space.n)
net.load_state_dict(torch.load('pong_model.pt', map_location=lambda storage, loc: storage))
for i in range(20):
    state = env.reset()
    while True:
        env.render()
        action = agent(state, net)
        next_state, _, done, _ = env.step(action)
        if done:
            break
        state = next_state

It can’t win even once. What could be wrong?

Can you post the code for ‘agent()’?

class EpsilonGreedyAgent(object):

    def __init__(self, actions_num):
        self.actions_num = actions_num
    
    def _get_state_value(self, state, model):
        state = np.expand_dims(state, 0)
        state = Variable(torch.from_numpy(state), volatile=False)
        if use_cuda:
            state = state.cuda()
        return model(state).data.cpu().numpy()

    def __call__(self, state, model, eps):
        state_value = self._get_state_value(state, model)
        sample = random.random()

        if sample > eps:
            action = np.argmax(state_value)
        else:
            action = random.randrange(self.actions_num)
        return action

I found my mistake. I didn’t use all wrappers for environment when I was testing model.

Hi ,

I have trained model for robotic arm in Gazebo simulation. But when I am loading that model, I can see all
trained weights in place and model is not winning single time. How should I wrap Gazebo environment. Any suggestions.