here is my model.py code:
from typing import List
import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from transformers import PreTrainedTokenizer, PreTrainedModel, AutoTokenizer, AutoModel, AutoConfig
from dataset import DataPoint, Data
import constants as constants
from torchcrf import CRF
from torch_geometric.nn import GCNConv
# constants for model
CLS_POS = 0
SUBTOKEN_PREFIX = '##'
IMAGE_SIZE = 224
VISUAL_LENGTH = (IMAGE_SIZE // 32) ** 2
def use_cache(module: nn.Module, data_points: List[DataPoint]):
for parameter in module.parameters():
if parameter.requires_grad:
return False
for data_point in data_points:
if data_point.feat is None:
return False
return True
def resnet_encode(model, x):
x = model.conv1(x)
x = model.bn1(x)
x = model.relu(x)
x = model.maxpool(x)
x = model.layer1(x)
x = model.layer2(x)
x = model.layer3(x)
x = model.layer4(x)
x = x.view(x.size()[0], x.size()[1], -1)
x = x.transpose(1, 2)
return x
class MultiModelModel(nn.Module):
def __init__(
self,
device: torch.device,
tokenizer: PreTrainedTokenizer,
encoder_t: PreTrainedModel,
hid_dim_t: int,
encoder_v: nn.Module = None,
hid_dim_v: int = None,
):
super().__init__()
self.device = device
self.tokenizer = tokenizer
self.encoder_t = encoder_t
self.hid_dim_t = hid_dim_t
self.encoder_v = encoder_v
self.hid_dim_v = hid_dim_v
self.token_embedding = None
self.proj = nn.Linear(hid_dim_v, hid_dim_t)
self.aux_head = nn.Linear(hid_dim_t, 2)
hid_dim_rnn = 256
num_layers = 2
num_directions = 2
self.gcn_layers = nn.ModuleList([
GCNConv(self.hid_dim_t, self.hid_dim_t),
GCNConv(self.hid_dim_t, self.hid_dim_t),
GCNConv(self.hid_dim_t, self.hid_dim_t),
GCNConv(self.hid_dim_t, self.hid_dim_t)
])
self.rnn = nn.LSTM(self.hid_dim_t, hid_dim_rnn, num_layers, batch_first=True, bidirectional=True)
self.head = nn.Linear(hid_dim_rnn * num_directions, constants.LABEL_SET_SIZE)
self.crf = CRF(constants.LABEL_SET_SIZE, batch_first=True)
self.to(device)
@classmethod
def from_pretrained(cls, cuda, t_encoder, v_encoder):
device = torch.device(f'cuda:{cuda}')
models_path = '../resources/models'
encoder_t_path = f'{models_path}/transformers/{t_encoder}'
tokenizer = AutoTokenizer.from_pretrained(encoder_t_path)
encoder_t = AutoModel.from_pretrained(encoder_t_path)
config = AutoConfig.from_pretrained(encoder_t_path)
hid_dim_t = config.hidden_size
encoder_v = getattr(torchvision.models, v_encoder)()
encoder_v.load_state_dict(torch.load(f'{models_path}/cnn/{v_encoder}.pth'))
hid_dim_v = encoder_v.fc.in_features
return cls(
device=device,
tokenizer=tokenizer,
encoder_t=encoder_t,
hid_dim_t=hid_dim_t,
encoder_v=encoder_v,
hid_dim_v=hid_dim_v,
)
def _bert_forward_with_image(self, inputs, datas, gate_signal=None):
images = [data.image for data,_,_ in datas]
textual_embeds = self.encoder_t.embeddings.word_embeddings(inputs.input_ids)
visual_embeds = torch.stack([image.data for image in images]).to(self.device)
if not use_cache(self.encoder_v, images):
visual_embeds = resnet_encode(self.encoder_v, visual_embeds)
visual_embeds = self.proj(visual_embeds)
if gate_signal is not None:
visual_embeds *= gate_signal
inputs_embeds = torch.concat((textual_embeds, visual_embeds), dim=1)
batch_size = visual_embeds.size()[0]
visual_length = visual_embeds.size()[1]
attention_mask = inputs.attention_mask
visual_mask = torch.ones((batch_size, visual_length), dtype=attention_mask.dtype, device=self.device)
attention_mask = torch.cat((attention_mask, visual_mask), dim=1)
token_type_ids = inputs.token_type_ids
visual_type_ids = torch.ones((batch_size, visual_length), dtype=token_type_ids.dtype, device=self.device)
token_type_ids = torch.cat((token_type_ids, visual_type_ids), dim=1)
return self.encoder_t(
inputs_embeds=inputs_embeds,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
return_dict=True
)
def ner_encode(self, datas: List[Data], gate_signal=None):
sentence_batch = [data.sentence for data,_,_ in datas]
tokens_batch = [[token.text for token in sentence] for sentence in sentence_batch]
inputs = self.tokenizer(tokens_batch, is_split_into_words=True, padding=True, return_tensors='pt',
return_special_tokens_mask=True, return_offsets_mapping=True).to(self.device)
outputs = self._bert_forward_with_image(inputs, datas, gate_signal)
feat_batch = outputs.last_hidden_state[:, :-VISUAL_LENGTH]
ids_batch = inputs.input_ids
offset_batch = inputs.offset_mapping
mask_batch = inputs.special_tokens_mask.bool().bitwise_not()
for sentence, ids, offset, mask, feat in zip(sentence_batch, ids_batch, offset_batch, mask_batch, feat_batch):
ids = ids[mask]
offset = offset[mask]
feat = feat[mask]
subtokens = self.tokenizer.convert_ids_to_tokens(ids)
length = len(subtokens)
token_list = []
feat_list = []
i = 0
while i < length:
j = i + 1
while j < length and (offset[j][0] != 0 or subtokens[j].startswith(SUBTOKEN_PREFIX)):
j += 1
token_list.append(''.join(subtokens[i:j]))
feat_list.append(torch.mean(feat[i:j], dim=0))
i = j
assert len(sentence) == len(token_list)
for token, token_feat in zip(sentence, feat_list):
token.feat = token_feat
def ner_forward(self, datas: List[Data]):
tokens_batch = [[token.text for token in data.sentence] for data, _, _ in datas]
inputs = self.tokenizer(tokens_batch, is_split_into_words=True, padding=True, return_tensors='pt')
inputs = inputs.to(self.device)
outputs = self._bert_forward_with_image(inputs, datas)
feats = outputs.last_hidden_state[:, CLS_POS]
logits = self.aux_head(feats)
gate_signal = F.softmax(logits, dim=1)[:, 1].view(len(datas), 1, 1)
_, edges, labels = datas[0]
edge_index = torch.cat(edges, dim=1)
self.ner_encode(datas, gate_signal)
sentences = [data.sentence for data,_,_ in datas]
batch_size = len(sentences)
lengths = [len(sentence) for sentence in sentences]
max_length = max(lengths)
feat_list = []
zero_tensor = torch.zeros(max_length * self.hid_dim_t, device=self.device)
for sentence in sentences:
feat_list += [token.feat for token in sentence]
num_padding = max_length - len(sentence)
if num_padding > 0:
padding = zero_tensor[:self.hid_dim_t * num_padding]
feat_list.append(padding)
feats = torch.cat(feat_list).view(batch_size, max_length, self.hid_dim_t)
print(feats.shape)
for gcn_layer in self.gcn_layers:
feats = gcn_layer(feats, edge_index)
feats = nn.utils.rnn.pack_padded_sequence(feats, lengths, batch_first=True, enforce_sorted=False)
feats, _ = self.rnn(feats)
feats, _ = nn.utils.rnn.pad_packed_sequence(feats, batch_first=True)
logits_batch = self.head(feats)
labels_batch = torch.zeros(batch_size, max_length, dtype=torch.long, device=self.device)
for i, sentence in enumerate(sentences):
labels = torch.tensor([token.label for token in sentence], dtype=torch.long, device=self.device)
labels_batch[i, :lengths[i]] = labels
mask = torch.zeros(batch_size, max_length, dtype=torch.bool, device=self.device)
for i in range(batch_size):
mask[i, :lengths[i]] = 1
loss = -self.crf(logits_batch, labels_batch, mask)
pred_ids = self.crf.decode(logits_batch, mask)
pred = [[constants.ID_TO_LABEL[i] for i in ids] for ids in pred_ids]
return loss, pred
dataset.py
import torch
from torch.utils.data import Dataset
from typing import List, Optional
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
import spacy
class DataPoint:
def __init__(self):
self.feat: Optional[torch.Tensor] = None
self.label: Optional[int] = None
class Token(DataPoint):
def __init__(self, text, label):
super().__init__()
self.text: str = text
self.label = label
class Sentence(DataPoint):
def __init__(self, tokens: List[Token] = None, text: str = None):
super().__init__()
self.tokens: List[Token] = tokens
self.text = text
def __len__(self):
return len(self.tokens)
def __getitem__(self, index: int):
return self.tokens[index]
def __iter__(self):
return iter(self.tokens)
def __str__(self):
return self.text if self.text else ' '.join([token.text for token in self.tokens])
class ImageData(DataPoint):
def __init__(self, file_name: str):
super().__init__()
#print(file_name)
self.file_name: str = file_name
self.data: ImageData = None
class Data(DataPoint):
def __init__(self, sentence, image, label=-1):
super().__init__()
self.sentence: Sentence = sentence
self.image: ImageData = image
self.label = label
class SpacyParser:
def __init__(self, data: List[Data], transform):
self.datas = data
self._invalid_words = [' ']
self.parser = spacy.load("en_core_web_sm")
def execute(self):
edges_b = []
labels_b = []
for data in tqdm(self.datas):
parsed = self.parser(str(data.sentence))
edges = []
i = 0
items_dict = dict()
for item in parsed:
if item.orth_ in self._invalid_words:
continue
items_dict[item.idx] = i
i += 1
for item in parsed:
if item.orth_ in self._invalid_words:
continue
index = items_dict[item.idx]
for child_index in [items_dict[l.idx] for l in item.children
if not l.orth_ in self._invalid_words]:
edges.append((index, child_index))
edges = torch.tensor(edges, dtype=torch.long)
edges = edges.t().contiguous().long()
labels = [token.label for token in data.sentence]
edges_b.append(edges)
labels_b.append(torch.tensor(labels, dtype=torch.long))
return edges_b, labels_b
class CustomDataset(Dataset):
def __init__(self, datas: List[Data], path_to_images: str, load_image: bool = True):
self.datas: List[Data] = datas
self.path_to_images = path_to_images
self.load_image = load_image
self.transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
self.edges, self.labels = SpacyParser(self.datas, self.transform).execute()
def __len__(self):
return len(self.datas)
def __getitem__(self, index: int):
data = self.datas[index]
if self.load_image:
image = data.image
if image.data is not None or image.feat is not None:
return data
# print(image.file_name)
path_to_image = self.path_to_images + "/" + image.file_name
image.data = Image.open(path_to_image).convert('RGB')
image.data = self.transform(image.data)
return data, self.edges, self.labels
class Corpus:
def __init__(self, train=None, dev=None, test=None):
self.train: CustomDataset = train
self.dev: CustomDataset = dev
self.test: CustomDataset = test
when running it with train.py:
import os
import pickle
import torch
from torch.utils.data import DataLoader
import loader
from model import MultiModelModel
from utils import seed_worker, seed_everything, train, evaluate
if __name__ == '__main__':
num_workers = 8
encoder_t = 'bert-base-uncased'
encoder_v = 'resnet152'
dataset = 'twitter2015'
lr = 1e-5
num_epochs = 1
optim = 'Adam'
bs = 16
seed_everything(0)
generator = torch.Generator()
generator.manual_seed(0)
if num_workers > 0:
torch.multiprocessing.set_sharing_strategy('file_system')
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
ner_corpus = loader.load_ner_corpus(f'resources/datasets/{dataset}', load_image=(encoder_v != ''))
ner_train_loader = DataLoader(ner_corpus.train, batch_size=bs, collate_fn=list, num_workers=num_workers,
shuffle=True, worker_init_fn=seed_worker, generator=generator)
ner_dev_loader = DataLoader(ner_corpus.dev, batch_size=bs, collate_fn=list, num_workers=num_workers)
ner_test_loader = DataLoader(ner_corpus.test, batch_size=bs, collate_fn=list, num_workers=num_workers)
model = MultiModelModel.from_pretrained(0, encoder_t, encoder_v) #CUDA编号,Transformer Encoder, Vision Encoder
params = [
{'params': model.encoder_t.parameters(), 'lr': lr},
{'params': model.head.parameters(), 'lr': lr * 100},
{'params': model.encoder_v.parameters(), 'lr': lr},
{'params': model.proj.parameters(), 'lr': lr * 100},
{'params': model.rnn.parameters(), 'lr': lr * 100},
{'params': model.crf.parameters(), 'lr': lr * 100},
{'params': model.aux_head.parameters(), 'lr': lr * 100}
]
optimizer = getattr(torch.optim, optim)(params)
dev_f1s, test_f1s = [], []
ner_losses, itr_losses = [], []
best_dev_f1, best_test_report = 0, None
#训练
for epoch in range(1, num_epochs + 1):
ner_loss = train(ner_train_loader, model, optimizer, task='ner')
ner_losses.append(ner_loss)
dev_f1, dev_report = evaluate(model, ner_dev_loader)
dev_f1s.append(dev_f1)
test_f1, test_report = evaluate(model, ner_test_loader)
test_f1s.append(test_f1)
if dev_f1 > best_dev_f1:
best_dev_f1 = dev_f1
best_test_report = test_report
print()
print(best_test_report)
file_name = f'trained/{encoder_t}-BiLSTM-{encoder_v}.pkl'
pickle.dump(model, open(file_name, 'wb'))
it gives me the error:
Traceback (most recent call last):
File "D:\projects\GNNNER\BiLSTM-Resnet152-Bert NER\train.py", line 59, in <module>
ner_loss = train(ner_train_loader, model, optimizer, task='ner')
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\projects\GNNNER\BiLSTM-Resnet152-Bert NER\utils.py", line 32, in train
loss, _ = getattr(model, f'{task}_forward')(batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\projects\GNNNER\BiLSTM-Resnet152-Bert NER\model.py", line 203, in ner_forward
feats = gcn_layer(feats, edge_index)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\projects\GNNNER\venv\Lib\site-packages\torch\nn\modules\module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\projects\GNNNER\venv\Lib\site-packages\torch\nn\modules\module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\projects\GNNNER\venv\Lib\site-packages\torch_geometric\nn\conv\gcn_conv.py", line 241, in forward
edge_index, edge_weight = gcn_norm( # yapf: disable
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\projects\GNNNER\venv\Lib\site-packages\torch_geometric\nn\conv\gcn_conv.py", line 108, in gcn_norm
deg = scatter(edge_weight, idx, dim=0, dim_size=num_nodes, reduce='sum')
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\projects\GNNNER\venv\Lib\site-packages\torch_geometric\utils\_scatter.py", line 75, in scatter
return src.new_zeros(size).scatter_add_(dim, index, src)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: index 28 is out of bounds for dimension 0 with size 28