Hello,
I am working on a model which would predict a Normal Distribution of a Vector.
Thats the code
class skilldynamic(nn.Module):
def __init__(self, state_size, obs_size, fix_var=False, fc1_units=256,fc2_units=256, seed=0):
"""Initialize parameters and build model.
Params
======
state_size (int): Dimension of each state
action_size (int): Dimension of each action
seed (int): Random seed
fc1_units (int): Number of nodes in first hidden layer
fc2_units (int): Number of nodes in second hidden layer
"""
super(skilldynamic, self).__init__()
self.seed = torch.manual_seed(seed)
self.fc1 = nn.Linear(state_size, fc1_units)
self.fc2 = nn.Linear(fc1_units, fc2_units)
self.mean = nn.Linear(fc2_units, obs_size)
self.std = nn.Linear(fc2_units, obs_size)
self.apply(weights_init_)
self._fix_variance = fix_var
# self.multi_normal = torch.distributions.MultivariateNormal(torch.Tensor([4,3,5]),torch.Tensor([[1,0,0],[0,1,0],[0,0,1]]))
if not self._fix_variance:
self._std_lower_clip = 0.3
self._std_upper_clip = 10.0
def forward(self, state):
"""Build a network that maps state -> action values."""
x = F.relu(self.fc1(state))
x = F.relu(self.fc2(x))
mean = self.mean(x)
logvar = self.std(x).clamp(self._std_lower_clip, self._std_upper_clip)
# state_pre = NormalDistribution(mean, logvar)
return mean, logvar
def reparam(self, mean, logvar):
sigma = (logvar / 2).exp()
epsilon = torch.randn_like(sigma)
return mean + torch.mul(epsilon, sigma)
def sample(self, state):
mean, log_std = self.forward(state)
std = log_std.exp()
normal = Normal(mean, std)
x_t = normal.rsample()
y_t = torch.tanh(x_t)
log_prob = normal.log_prob(x_t)
log_prob = log_prob.sum(0, keepdim=True)
return x_t, log_prob, normal
class SkillDynamicsModell():
def __init__(self, state):
self.dynamic_model = skilldynamic(state.shape[0], state.shape[0])
self.dynamic_optimizer = torch.optim.Adam(self.dynamic_model.parameters(), 0.003)
def predict(self, state):
return self.dynamic_model.sample(state)
def learn(self, state, next_state):
x_t, log_prob, normal = self.dynamic_model.sample(state)
delta = x_next - state
model.dynamic_optimizer.zero_grad()
loss = -normal.log_prob(delta).sum(0, keepdim=True)
loss.backward()
model.dynamic_optimizer.step()
print("Loss :", loss.cpu().detach().numpy())
state = torch.Tensor(state)
next_state = torch.Tensor(next_state)
model = SkillDynamicsModell(state)
print("state", state)
print("next state", x_next)
for i in range(1000):
model.learn(state, next_state)
Does anyone see what I am doing wrong ?