When the output of the network passes through the compute_result function, its model parameter gradient is None.
def compute_result(entity_in_doc, entity_in_mention, gold_indices, score):
# mention: tensor entity_num
# score: tensor entity_num
# entity_in_doc: tensor entity_num
# gold_indices: tensor mention_num
# 将每个文档中每个mention中的 score 提取出来
# print("entity_in_doc:", len(entity_in_doc))
# print("entity_in_mention:", len(entity_in_mention))
# print("gold_indices:", gold_indices)
# print("score:", score.shape)
entity_score_doc = []
entity_mention = []
for i in range(max(entity_in_doc)+1):
entity_score_doc.append([score[j] for j in range(len(entity_in_doc)) if entity_in_doc[j] == i])
entity_mention.append([entity_in_mention[j] for j in range(len(entity_in_doc)) if entity_in_doc[j] == i])
# print('entity_score_doc:', len(entity_score_doc))
# print('entity_mention ',entity_mention[0])
# 进一步切分
entity_score_mention = []
for i, row in enumerate(entity_score_doc):
unique_indices = [t.item() for t in entity_mention[i]]
t = defaultdict(list)
for j, num in enumerate(row):
t[unique_indices[j]].append(num)
# 将 t.values() 中的每个列表元素直接添加到 result 中
# print('t:', t)
for sublist in t.values():
# print('sublist:', len(sublist))
entity_score_mention.append(sublist)
# print('entity_score_mention:', len(entity_score_mention))
# 将每个文档中每个mention中的score最高候选实体的索引放到一个 list中 max_indices
sorted_result = []
max_value_indices = []
for sublist in entity_score_mention:
sublist.sort() # 对子数组进行排序
sorted_result.append(sublist)
max_value_index = sublist.index(max(sublist)) # 找到最高值的索引
max_value_indices.append(max_value_index)
# 将每个文档中每个mention中索引为gold_index 的排名放到一个list: gold_rank中
rankings = []
for sublist, special_index in zip(entity_score_mention, gold_indices):
sorted_sublist = sorted(sublist, reverse=True) # 对子数组进行排序
# print('special_index', special_index, ' sublist', len(sublist))
special_value = sublist[special_index] # 获取特别索引对应的值
ranking = sorted_sublist.index(special_value) # 找到特别索引对应值的排名
rankings.append(ranking)
# print("max_indices:", max_value_indices)
# print("ranks:", rankings)
# 将结果转为 tensor
max_value_indices = torch.tensor(max_value_indices, dtype=torch.long)
rankings = torch.tensor(rankings, dtype=torch.float32, requires_grad=True)
return max_value_indices, rankings
for i in range(epoch):
x = [] # 文档
entity_in_doc_batch = [] # entity_num in each batch
entity_in_sentence_batch = [] # entity_num in each batch
entity_in_mention_batch = [] # entity_num in each batch
entity_batch = [] # entity_num embedding_dim in each batch
gold_indices_batch = [] # mention_num in each batch
max_doc_len = 0
max_sent_len = 0
batch_x = [] # batch, sentence_num, sentence_len, 1 未对其进行padding的x
is_reset = False
# 生成训练数据
for j in range(step, step + Batch_size):
if j >= len(preprocessed_train_documents):
is_reset = True
break
doc = preprocessed_train_documents[j]
# print(doc)
sentence_representations = generate_sentence_representations(doc)
# print(sentence_representations)
batch_x.append(sentence_representations)
gold_indices_batch.extend(gold_indices[j])
# 获取对应文档的实体表征信息
for k in range(len(entity_in_doc)):
if step <= entity_in_doc[k] < step + Batch_size and entity_in_doc[k] < len(preprocessed_train_documents):
entity_in_doc_batch.append(entity_in_doc[k] - step)
entity_in_sentence_batch.append(entity_in_sentence[k])
entity_in_mention_batch.append(entity_in_mention[k])
entity_batch.append(entities[k])
if is_reset:
step = 0
else:
step += Batch_size
# 获取当前批次中的最大句子长度和最大文档长度
max_doc_len = max(len(doc) for doc in batch_x)
max_sent_len = max(len(sent) for doc in batch_x for sent in doc)
# 将 x, entity_in_doc_batch, entity_in_sentence_batch, entity_in_mention_batch 转为 tensor
# print('max_sent_len:', max_sent_len)
# print('max_doc_len:', max_doc_len)
# padding 操作对 x 进行填充, 先遍历所有句子,将句子长度填充到最大句子长度
padded_batch_x = [] # batch, max_sent_num, max_sent_len
# print('batch_x:', batch_x)
for doc in batch_x:
padded_doc = []
for sent in doc:
# print('before: ',sent)
sent = torch.cat([sent, torch.zeros(max_sent_len - len(sent), dtype=torch.long)])
# print('after: ',sent)
padded_doc.append(sent)
if len(padded_doc) < max_doc_len:
# 填充 max_doc_len - len(doc) 个句子
for t in range(max_doc_len - len(doc)):
padded_doc.append(torch.zeros(max_sent_len, dtype=torch.long))
padded_doc = torch.stack(padded_doc)
padded_batch_x.append(padded_doc)
x = torch.stack(padded_batch_x)
entity_in_doc_batch = torch.tensor(entity_in_doc_batch, dtype=torch.long)
entity_in_sentence_batch = torch.tensor(entity_in_sentence_batch, dtype=torch.long)
entity_in_mention_batch = torch.tensor(entity_in_mention_batch, dtype=torch.long)
entity_batch = torch.stack(entity_batch)
# print('entity_in_doc_batch:', entity_in_doc_batch.shape)
# print('entity_batch:', entity_batch.shape)
# 训练模型
optimizer.zero_grad() # def forward(self, x, entities, entity_in_doc, entity_in_sent, entity_in_mention, use_gpu=False):
# 输出 x 中最大的词索引和最小的词索引
score = model(x, entity_batch, entity_in_doc_batch, entity_in_sentence_batch, use_gpu=False)
# 计算 loss
# # 获取每个文档中 gold_index 的排名
max_indices, ranks = compute_result(entity_in_doc_batch, entity_in_mention_batch, gold_indices_batch, score)
# # 将 max_indices, ranks 转为 tensor
target = torch.zeros(len(ranks), dtype=torch.float32, requires_grad=True)
loss = rankloss(target, ranks, torch.tensor([1], dtype=torch.float32))
# print('ranks: ', ranks)
# print('max_indices: ', max_indices)
# print('score:', score.requires_grad)
# print('target:', target.requires_grad)
# print('loss:', loss.requires_grad)
loss.backward()
optimizer.step()
# 打印模型梯度检查是否存在问题
for name, param in model.named_parameters():
print(f"Parameter: {name}, grad: {param.grad}")
print('epoch ', i, ' loss: ',loss. Item())