My param.grad returns None.

When the output of the network passes through the compute_result function, its model parameter gradient is None.

def compute_result(entity_in_doc, entity_in_mention, gold_indices, score):
    # mention: tensor entity_num
    # score: tensor entity_num
    # entity_in_doc: tensor entity_num
    # gold_indices: tensor mention_num
    # 将每个文档中每个mention中的 score 提取出来

    # print("entity_in_doc:", len(entity_in_doc))
    # print("entity_in_mention:", len(entity_in_mention))
    # print("gold_indices:", gold_indices)
    # print("score:", score.shape)

    entity_score_doc = []
    entity_mention = []
    for i in range(max(entity_in_doc)+1):
        entity_score_doc.append([score[j] for j in range(len(entity_in_doc)) if entity_in_doc[j] == i])
        entity_mention.append([entity_in_mention[j] for j in range(len(entity_in_doc)) if entity_in_doc[j] == i])

    # print('entity_score_doc:', len(entity_score_doc))
    # print('entity_mention ',entity_mention[0])
    # 进一步切分
    entity_score_mention = []

    for i, row in enumerate(entity_score_doc):
        unique_indices = [t.item() for t in entity_mention[i]]
        t = defaultdict(list)
        for j, num in enumerate(row):
            t[unique_indices[j]].append(num)
        # 将 t.values() 中的每个列表元素直接添加到 result 中
        # print('t:', t)
        for sublist in t.values():
            # print('sublist:', len(sublist))
            entity_score_mention.append(sublist)

    # print('entity_score_mention:', len(entity_score_mention))
    # 将每个文档中每个mention中的score最高候选实体的索引放到一个 list中 max_indices
    sorted_result = []
    max_value_indices = []
    for sublist in entity_score_mention:
        sublist.sort()  # 对子数组进行排序
        sorted_result.append(sublist)
        max_value_index = sublist.index(max(sublist))  # 找到最高值的索引
        max_value_indices.append(max_value_index)

    # 将每个文档中每个mention中索引为gold_index 的排名放到一个list: gold_rank中
    rankings = []

    for sublist, special_index in zip(entity_score_mention, gold_indices):
        sorted_sublist = sorted(sublist, reverse=True)  # 对子数组进行排序
        # print('special_index', special_index, ' sublist', len(sublist))
        special_value = sublist[special_index]  # 获取特别索引对应的值

        ranking = sorted_sublist.index(special_value)  # 找到特别索引对应值的排名
        rankings.append(ranking)
    # print("max_indices:", max_value_indices)
    # print("ranks:", rankings)

    # 将结果转为 tensor
    max_value_indices = torch.tensor(max_value_indices, dtype=torch.long)
    rankings = torch.tensor(rankings, dtype=torch.float32, requires_grad=True)
    return max_value_indices, rankings
for i in range(epoch):
    x = []  # 文档
    entity_in_doc_batch = []  # entity_num in each batch
    entity_in_sentence_batch = []  # entity_num in each batch
    entity_in_mention_batch = []  # entity_num in each batch
    entity_batch = []  # entity_num embedding_dim in each batch
    gold_indices_batch = []  # mention_num in each batch
    max_doc_len = 0
    max_sent_len = 0
    batch_x = []  # batch, sentence_num, sentence_len, 1 未对其进行padding的x
    is_reset = False
    # 生成训练数据
    for j in range(step, step + Batch_size):
        if j >= len(preprocessed_train_documents):
            is_reset = True
            break
        doc = preprocessed_train_documents[j]
        # print(doc)
        sentence_representations = generate_sentence_representations(doc)
        # print(sentence_representations)
        batch_x.append(sentence_representations)
        gold_indices_batch.extend(gold_indices[j])


    # 获取对应文档的实体表征信息
    for k in range(len(entity_in_doc)):
        if step <= entity_in_doc[k] < step + Batch_size and entity_in_doc[k] < len(preprocessed_train_documents):
            entity_in_doc_batch.append(entity_in_doc[k] - step)
            entity_in_sentence_batch.append(entity_in_sentence[k])
            entity_in_mention_batch.append(entity_in_mention[k])
            entity_batch.append(entities[k])
    if is_reset:
        step = 0
    else:
        step += Batch_size
    # 获取当前批次中的最大句子长度和最大文档长度
    max_doc_len = max(len(doc) for doc in batch_x)
    max_sent_len = max(len(sent) for doc in batch_x for sent in doc)

    # 将 x, entity_in_doc_batch, entity_in_sentence_batch, entity_in_mention_batch 转为 tensor
    # print('max_sent_len:', max_sent_len)
    # print('max_doc_len:', max_doc_len)
    # padding 操作对 x 进行填充, 先遍历所有句子,将句子长度填充到最大句子长度
    padded_batch_x = []  # batch, max_sent_num, max_sent_len
    # print('batch_x:', batch_x)
    for doc in batch_x:
        padded_doc = []
        for sent in doc:
            # print('before: ',sent)
            sent = torch.cat([sent, torch.zeros(max_sent_len - len(sent), dtype=torch.long)])
            # print('after: ',sent)
            padded_doc.append(sent)
        if len(padded_doc) < max_doc_len:
            # 填充 max_doc_len - len(doc) 个句子
            for t in range(max_doc_len - len(doc)):
                padded_doc.append(torch.zeros(max_sent_len, dtype=torch.long))
        padded_doc = torch.stack(padded_doc)

        padded_batch_x.append(padded_doc)

    x = torch.stack(padded_batch_x)
    entity_in_doc_batch = torch.tensor(entity_in_doc_batch, dtype=torch.long)
    entity_in_sentence_batch = torch.tensor(entity_in_sentence_batch, dtype=torch.long)
    entity_in_mention_batch = torch.tensor(entity_in_mention_batch, dtype=torch.long)
    entity_batch = torch.stack(entity_batch)
    # print('entity_in_doc_batch:', entity_in_doc_batch.shape)
    # print('entity_batch:', entity_batch.shape)
    # 训练模型
    optimizer.zero_grad()  # def forward(self, x, entities, entity_in_doc, entity_in_sent, entity_in_mention, use_gpu=False):
    # 输出 x 中最大的词索引和最小的词索引
    score = model(x, entity_batch, entity_in_doc_batch, entity_in_sentence_batch, use_gpu=False)
    # 计算 loss
    # # 获取每个文档中 gold_index 的排名
    max_indices, ranks = compute_result(entity_in_doc_batch, entity_in_mention_batch, gold_indices_batch, score)
    # # 将 max_indices, ranks 转为 tensor
    target = torch.zeros(len(ranks), dtype=torch.float32, requires_grad=True)

    loss = rankloss(target, ranks, torch.tensor([1], dtype=torch.float32))
    # print('ranks: ', ranks)
    # print('max_indices: ', max_indices)
    # print('score:', score.requires_grad)
    # print('target:', target.requires_grad)
    # print('loss:', loss.requires_grad)




    loss.backward()
    optimizer.step()
    # 打印模型梯度检查是否存在问题
    for name, param in model.named_parameters():
        print(f"Parameter: {name}, grad: {param.grad}")
    print('epoch ', i, ' loss: ',loss. Item())

Hi @Cartman,

When you re-wrap the max_value_indices to be a torch.tensor again, you destroy the history of operations for that tensor and effectively restart its gradient calculation. In the code snippet below, you re-wrap your outputs so you delete the gradient information (even when you specify requires_grad=True) as it will just record the gradients from this point onward, which is None by default.

You need to re-write this function and avoid calling torch.tensor on intermediate values as that will destroy any gradient information.