the value of grad is None when I load aggregated parameter

mport torch
import torch.nn as nn
import numpy as np
from Model import rl_net
from torchviz import make_dot

import matplotlib.pyplot as plt

Hyper Parameters

TARGET_REPLACE_ITER = 100
num_action = 3
Memory_capacity = 500
LR = 0.001
EPSILON = 0.9
BATCH_SIZE = 32
GAMMA = 0.95
num_alpha_state = 12
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)

class Agent1(object):
def init(self):
self.Eval_Net = rl_net().to(device)
self.Target_Net = rl_net().to(device)
self.memory_counter = 0
self.alpha_memory = torch.from_numpy(np.zeros((Memory_capacity, 26))).to(device=device)
self.learn_step_counter_alpha = 0
self.learn_alpha_step_counter = 0
self.rl_net = None
def local_learn(self,localEpoch,localBatchSize,global_parameters):
self.Eval_Net.load_state_dict(global_parameters,strict=True)
loss_func = nn.MSELoss()
opti = torch.optim.Adam(self.Eval_Net.parameters(),lr=LR)
for epoch in range(localEpoch):
self.learn_step_counter_alpha += 1
if self.learn_step_counter_alpha % TARGET_REPLACE_ITER ==0:
self.Target_Net.load_state_dict(self.Eval_Net.state_dict())
sample_index = torch.tensor(np.random.choice(Memory_capacity, localBatchSize),device=device).long()
b_alpha_memory = torch.tensor(self.alpha_memory[sample_index, :],device=device)
b_alpha_s = torch.tensor(b_alpha_memory[:, :num_alpha_state]).float()
b_alpha_a = torch.tensor(b_alpha_memory[:, num_alpha_state:num_alpha_state + 1]).long()
b_alpha_r = torch.tensor(b_alpha_memory[:, num_alpha_state + 1:num_alpha_state + 2])
b_alpha_s_ = torch.tensor(b_alpha_memory[:, -num_alpha_state:]).float()
q_alpha_eval = self.Eval_Net.forward(b_alpha_s).gather(1, b_alpha_a) # shape (batch, 1)获取动作的reward
q_alpha_next = self.Target_Net.forward(b_alpha_s_).detach() # detach from graph, don’t backpropagate
q_alpha_target = b_alpha_r + GAMMA * q_alpha_next.max(1)[0].view(BATCH_SIZE, 1) # shape (batch, 1)
loss_alpha = torch.tensor(loss_func(q_alpha_eval,q_alpha_target),dtype=float,device=device,requires_grad=True)
opti.zero_grad()
loss_alpha.backward(retain_graph=True)
make_dot(self.Eval_Net(torch.rand(12)))
self.rl_net = self.Target_Net
def store_alpha_transition(self, s, a, r, s_): # 可复用
s = torch.tensor([s]).to(device=device)
a = torch.tensor([a]).to(device=device)
r = torch.tensor([r]).to(device=device)
s_ = torch.tensor([s_]).to(device=device)
s = torch.squeeze(s,dim=0)
s_ = torch.squeeze(s_, dim=0)
transition = torch.cat((s,a,r,s_))
index = self.memory_counter % Memory_capacity
self.alpha_memory[index, :] = transition
self.memory_counter += 1

def choose_action(self, state):  # 可复用
    if np.random.uniform() < EPSILON:  # greedy

        state = torch.Tensor(state)
        actions_value = self.Target_Net.forward(state)
        action = torch.argmax(actions_value)
            else:  # random
        action = torch.tensor([np.random.randint(0, 3)],device=device)
                return action
def extract_para(self):
    return self.Target_Net.state_dict()

def local_train(self,env,obs,episodes,global_parameters):
    for i in range(episodes):
        action = self.choose_action(obs)
        next_state, reward, done, collision = env.alpha_step(action)
        self.store_alpha_transition(obs,action,reward,next_state)
        obs = next_state
        if self.memory_counter >= Memory_capacity:
            self.local_learn(localEpoch=200,localBatchSize=32,global_parameters=global_parameters)