None Grad while I set requires_grad=True

Would you please help me.
when I run the code below I get similar values for the loss function. So, there is not training in the network.
there is no update through the network parameters. Could please help me what is the wrong in the implementation?
i checked the other similar topics and tried them but still it does not train.

the code is here:

class AgentRNN(nn.Module):
    def __init__(self, cfg):
        super(AgentRNN, self).__init__()
        self.lr = cfg.rnn_lr
        self.hidden_units = cfg.hidden_units
        self.tou = cfg.tou
        self.num_obs = cfg.num_obs
        self.output_size = cfg.num_output
        # self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.i2h = nn.Linear(self.num_obs , self.hidden_units) #self.num_obs + self.hidden_units
        self.i2h.double()
        self.h2h = nn.Linear(self.hidden_units, self.hidden_units)
        self.h2h.double()
        self.i2o = nn.Linear(self.hidden_units, self.output_size)
        self.i2o.double()
        # self.softmax = nn.Softmax()


        self.gru = nn.GRU(input_size=self.num_obs, hidden_size=self.hidden_units)
        self.gru.double()
        self.linear = nn.Linear(self.hidden_units, self.output_size)
        self.linear.double()

        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
        self.criterion = nn.MSELoss() #nn.CrossEntropyLoss() # nn.MSELoss()
        self.init_weight()
        self.batch_size = cfg.batch_size

        self.summary_writer = cfg.summary_writer
        self.step = 0

        self.horizon = cfg.horizon
        self.h_states = []
        self.batch_h, self.batch_obs = [], []
        self.output_batch = []

        self.obs_sequence = None
        self.outputs = []
        self.losses = []
        self.h = None

        self.environment = cfg.env_instance


    def start(self):
        self.obs= self.environment.start()
        self.batch_obs.append(self.obs)
        self.output = self.environment.get_output()
        self.output_batch.append(self.output)
        self.environment.get_position()

        self.step += 1
        self.obs_sequence = self.obs #self.obs_sequence = np.array([np.asscalar(self.obs)])
        h_t = torch.zeros(1, self.hidden_units, dtype=torch.double, requires_grad=True)
        input = torch.from_numpy(self.obs) #torch.tensor(self.obs) #input = torch.tensor(np.expand_dims(self.obs_sequence, axis=0))
        outs, hids = self._forward(input, h_t)

        self.h = torch.tensor(hids, requires_grad=False) #hids[0]
        prediction = outs #outs[-1]
        self.outputs.append(prediction)

        return prediction

    def update(self):
        self.obs = self.environment.step()
        self.output = self.environment.get_output()
        self.output_batch.append(self.output)
        self.environment.get_position()

        self.step += 1

        self.obs_sequence = self.obs  #np.append(self.obs_sequence, np.array(self.obs)) #self.obs_sequence = np.append(self.obs_sequence, np.asscalar(self.obs))

        input = torch.tensor(self.obs) #torch.tensor(np.expand_dims(self.obs_sequence[-self.tou:], axis=0)) #input = torch.tensor(np.expand_dims(self.obs_sequence[-self.tou:], axis=0))
        outs, hids = self._forward(input, self.h)

        self.h = torch.tensor(hids, requires_grad=False) # hids[0]
        prediction = outs # outs[-1]
        self.outputs.append(prediction)

        if self.step <= self.horizon:
            return prediction.item()

        target = []
        for i in range(np.shape(self.output)[0]):
            if self.output[i] != 0:
                    target.append(i)

        target = torch.tensor(np.float64(target), requires_grad=True)
        target = torch.autograd.variable(target.long())

        outs = torch.tensor(outs, requires_grad=True)
        self.output = torch.tensor(np.double(self.output)) #
        loss = self.criterion(outs.unsqueeze(0), self.output.unsqueeze(0))  #loss = self.criterion(self.outputs[-(self.horizon + 1)], torch.tensor(np.asscalar(self.obs), dtype=torch.double))
        loss.backward()

        self.losses.append(loss.item())

        if self.step % self.batch_size == 0:
            # torch.nn.utils.clip_grad_norm_(self.parameters(), 2)
            self.optimizer.step()
            self.optimizer.zero_grad()

        return prediction, loss.item()

    def set_seed(self, seed):
        pass

    def log_loss(self, loss):
        self.summary_writer.add_scalar('loss/step/model_loss', loss, self.step)

    def _forward(self, inputs, h_t=None):
        inputs = inputs.squeeze(0).unsqueeze(0) #inputs.squeeze(0).unsqueeze(1).unsqueeze(1)
        if h_t is None:
            h_t = torch.zeros(1, self.hidden_units, dtype=torch.double)

        inputs = inputs.double()

        combined = torch.cat((inputs, h_t), 1)
        hidden = self.i2h(inputs) # nn.relu()
        # hidden = F.relu(self.h2h(hidden1))
        output = (self.i2o(hidden)).squeeze(0)
        output1 = F.softmax(output)

        return output, hidden 

    def init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                torch.nn.init.xavier_uniform_(m.weight.data)
                m.bias.data.fill_(0)

It’s likely because it’s not a leaf variable.
See

2 Likes

No, it is a leaf variable. But I meant there is no learning in the network. That was my problem and I guess it is related to the grad value! Could you please help me why this network does not work?
I also checked all previous topics as you mentioned. They could not help me!

Thank you