So I am trying to build a kind of Actor critic model that uses hierarchical RL, and my problem is that one of my actor network returns an action with shape (1, 512) instead of (1, 6) even though it says that the dimension of the output layer is 6.
I am new to PyTorch and relatively new to RL so I don’t understand everything yet, if someone could help me it would be amazing. If you want more informations just ask.
Thank you in advance.
Actor network :
import os
import torch
import torch.nn as nn
import numpy as np
class ActorNetwork(nn.Module):
def __init__(self, input_size, l1_dims=512, l2_dims=512,
output_size=1, name="actor", checkpoint_dir="../assets/model",
bound=1, offset=0):
super(ActorNetwork, self).__init__()
self.input_size = input_size
self.l1_dims = l1_dims
self.l2_dims = l2_dims
self.output_size = output_size
self.bound = bound
self.offset = offset
self.name = name
self.checkpoint_dir = checkpoint_dir
self.checkpoint_file = os.path.join(self.checkpoint_dir,
self.name)
self.l1 = nn.Linear(self.input_size, self.l1_dims)
self.relu1 = nn.ReLU()
self.l2 = nn.Linear(self.l1_dims, self.l2_dims)
self.relu2 = nn.ReLU()
self.l3 = nn.Linear(self.l2_dims, self.output_size)
self.tanh = nn.Tanh()
def call(self, state, goal):
x = torch.cat([state, goal], dim=1)
linear1 = self.l1(x)
relu1 = self.relu1(linear1)
linear2 = self.l2(relu1)
relu2 = self.relu2(linear2)
tanh = self.tanh(relu2)
return (tanh.detach().numpy() * self.bound) + self.offset