Loss is not decreasing for Normal distribution prediction

Chris_Freiburg · May 17, 2021, 4:06pm

Hello,

I am working on a model which would predict a Normal Distribution of a Vector.
Thats the code





class skilldynamic(nn.Module):





   def __init__(self, state_size, obs_size, fix_var=False, fc1_units=256,fc2_units=256, seed=0):
        """Initialize parameters and build model.
        Params
        ======
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            seed (int): Random seed
            fc1_units (int): Number of nodes in first hidden layer
            fc2_units (int): Number of nodes in second hidden layer
        """
        super(skilldynamic, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fc1 = nn.Linear(state_size, fc1_units)
        self.fc2 = nn.Linear(fc1_units, fc2_units)
        self.mean = nn.Linear(fc2_units, obs_size)
        self.std = nn.Linear(fc2_units, obs_size)
        self.apply(weights_init_)
        self._fix_variance = fix_var
        # self.multi_normal = torch.distributions.MultivariateNormal(torch.Tensor([4,3,5]),torch.Tensor([[1,0,0],[0,1,0],[0,0,1]]))
        if not self._fix_variance:
            self._std_lower_clip = 0.3
            self._std_upper_clip = 10.0

    def forward(self, state):
        """Build a network that maps state -> action values."""
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        mean = self.mean(x)
        logvar =  self.std(x).clamp(self._std_lower_clip, self._std_upper_clip)
        # state_pre = NormalDistribution(mean, logvar)
        return mean, logvar 
    
    def reparam(self, mean, logvar):
        sigma = (logvar / 2).exp()
        epsilon = torch.randn_like(sigma)
        return mean + torch.mul(epsilon, sigma)
    
    def sample(self, state):
        mean, log_std  = self.forward(state)
        std = log_std.exp()
        normal = Normal(mean, std)
        x_t = normal.rsample()
        y_t = torch.tanh(x_t)
        log_prob = normal.log_prob(x_t)
        log_prob = log_prob.sum(0, keepdim=True)
        return x_t, log_prob, normal


class SkillDynamicsModell():
    def __init__(self, state):
        self.dynamic_model = skilldynamic(state.shape[0], state.shape[0])
        self.dynamic_optimizer = torch.optim.Adam(self.dynamic_model.parameters(), 0.003)
    
    def predict(self, state):
        return self.dynamic_model.sample(state)
    
    def learn(self, state, next_state):
        x_t, log_prob, normal = self.dynamic_model.sample(state)
        delta = x_next - state
        model.dynamic_optimizer.zero_grad()
        loss = -normal.log_prob(delta).sum(0, keepdim=True)
        loss.backward()
        model.dynamic_optimizer.step()
        print("Loss :", loss.cpu().detach().numpy())

state =  torch.Tensor(state)
next_state =  torch.Tensor(next_state)
model =  SkillDynamicsModell(state)
print("state", state)
print("next state", x_next)

for i in range(1000):
    model.learn(state, next_state)

Does anyone see what I am doing wrong ?