Would you please help me.
when I run the code below I get similar values for the loss function. So, there is not training in the network.
there is no update through the network parameters. Could please help me what is the wrong in the implementation?
i checked the other similar topics and tried them but still it does not train.
the code is here:
class AgentRNN(nn.Module):
def __init__(self, cfg):
super(AgentRNN, self).__init__()
self.lr = cfg.rnn_lr
self.hidden_units = cfg.hidden_units
self.tou = cfg.tou
self.num_obs = cfg.num_obs
self.output_size = cfg.num_output
# self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.i2h = nn.Linear(self.num_obs , self.hidden_units) #self.num_obs + self.hidden_units
self.i2h.double()
self.h2h = nn.Linear(self.hidden_units, self.hidden_units)
self.h2h.double()
self.i2o = nn.Linear(self.hidden_units, self.output_size)
self.i2o.double()
# self.softmax = nn.Softmax()
self.gru = nn.GRU(input_size=self.num_obs, hidden_size=self.hidden_units)
self.gru.double()
self.linear = nn.Linear(self.hidden_units, self.output_size)
self.linear.double()
self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
self.criterion = nn.MSELoss() #nn.CrossEntropyLoss() # nn.MSELoss()
self.init_weight()
self.batch_size = cfg.batch_size
self.summary_writer = cfg.summary_writer
self.step = 0
self.horizon = cfg.horizon
self.h_states = []
self.batch_h, self.batch_obs = [], []
self.output_batch = []
self.obs_sequence = None
self.outputs = []
self.losses = []
self.h = None
self.environment = cfg.env_instance
def start(self):
self.obs= self.environment.start()
self.batch_obs.append(self.obs)
self.output = self.environment.get_output()
self.output_batch.append(self.output)
self.environment.get_position()
self.step += 1
self.obs_sequence = self.obs #self.obs_sequence = np.array([np.asscalar(self.obs)])
h_t = torch.zeros(1, self.hidden_units, dtype=torch.double, requires_grad=True)
input = torch.from_numpy(self.obs) #torch.tensor(self.obs) #input = torch.tensor(np.expand_dims(self.obs_sequence, axis=0))
outs, hids = self._forward(input, h_t)
self.h = torch.tensor(hids, requires_grad=False) #hids[0]
prediction = outs #outs[-1]
self.outputs.append(prediction)
return prediction
def update(self):
self.obs = self.environment.step()
self.output = self.environment.get_output()
self.output_batch.append(self.output)
self.environment.get_position()
self.step += 1
self.obs_sequence = self.obs #np.append(self.obs_sequence, np.array(self.obs)) #self.obs_sequence = np.append(self.obs_sequence, np.asscalar(self.obs))
input = torch.tensor(self.obs) #torch.tensor(np.expand_dims(self.obs_sequence[-self.tou:], axis=0)) #input = torch.tensor(np.expand_dims(self.obs_sequence[-self.tou:], axis=0))
outs, hids = self._forward(input, self.h)
self.h = torch.tensor(hids, requires_grad=False) # hids[0]
prediction = outs # outs[-1]
self.outputs.append(prediction)
if self.step <= self.horizon:
return prediction.item()
target = []
for i in range(np.shape(self.output)[0]):
if self.output[i] != 0:
target.append(i)
target = torch.tensor(np.float64(target), requires_grad=True)
target = torch.autograd.variable(target.long())
outs = torch.tensor(outs, requires_grad=True)
self.output = torch.tensor(np.double(self.output)) #
loss = self.criterion(outs.unsqueeze(0), self.output.unsqueeze(0)) #loss = self.criterion(self.outputs[-(self.horizon + 1)], torch.tensor(np.asscalar(self.obs), dtype=torch.double))
loss.backward()
self.losses.append(loss.item())
if self.step % self.batch_size == 0:
# torch.nn.utils.clip_grad_norm_(self.parameters(), 2)
self.optimizer.step()
self.optimizer.zero_grad()
return prediction, loss.item()
def set_seed(self, seed):
pass
def log_loss(self, loss):
self.summary_writer.add_scalar('loss/step/model_loss', loss, self.step)
def _forward(self, inputs, h_t=None):
inputs = inputs.squeeze(0).unsqueeze(0) #inputs.squeeze(0).unsqueeze(1).unsqueeze(1)
if h_t is None:
h_t = torch.zeros(1, self.hidden_units, dtype=torch.double)
inputs = inputs.double()
combined = torch.cat((inputs, h_t), 1)
hidden = self.i2h(inputs) # nn.relu()
# hidden = F.relu(self.h2h(hidden1))
output = (self.i2o(hidden)).squeeze(0)
output1 = F.softmax(output)
return output, hidden
def init_weight(self):
for m in self.modules():
if isinstance(m, nn.Linear):
torch.nn.init.xavier_uniform_(m.weight.data)
m.bias.data.fill_(0)