This is my model. Only showing the __ init __() and forward() functions
class BERTplusAoA(nn.Module):
def __init__(self, config, options):
super(BERTplusAoA, self).__init__()
self.bert = BertModel.from_pretrained(
options.model_name_or_path,
from_tf=bool(".ckpt" in options.model_name_or_path),
config=config,
cache_dir=options.cache_dir if options.cache_dir else None,
)
self.l0 = nn.Linear(1024, 2)
self.lq = nn.Linear(1024,256)
self.lc = nn.Linear(1024,256)
self.soft=nn.Softmax(dim=-1)
self.wb = torch.sigmoid(nn.Parameter(torch.zeros(1)))
self.we = torch.sigmoid(nn.Parameter(torch.zeros(1)))
def forward(self, input_ids, attention_mask, token_type_ids,start_positions,end_positions):
sequence_out, _ = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
# (batch_size,num_tokens,1024)
logits = self.l0(sequence_out)
# (batch_size,num_tokens,2)
start_logits,end_logits=logits.split(1,dim=-1)
# (batch_size,num_tokens,1) # (batch_size,num_tokens,1)
start_logits=start_logits.squeeze(-1)
end_logits=end_logits.squeeze(-1)
# (batch_size,num_tokens) # (batch_size,num_tokens)
lb = F.softmax(start_logits,dim=-1)
le = F.softmax(end_logits,dim=-1)
hq,hc=self.splitting_(sequence_out,token_type_ids)
#return hq,hc,sequence_out
# hq, hc are list of tensors.
hq,hq_len=self._batchify(hq,include_lengths=True)
hc,hc_len=self._batchify(hc,include_lengths=True)
# hq, hc have are tensors of dim: # (batch_size, max_length q or c, 1024)
Hq = self.lq(hq)
Hq_b,Hq_e = Hq.split(128,dim=-1)
Hc = self.lc(hc)
Hc_b,Hc_e = Hc.split(128,dim=-1)
..
I get the following error:
Epoch: 0% 0/20 [00:00<?, ?it/s]
Iteration: 0% 0/6464 [00:00<?, ?it/s]Traceback (most recent call last):
File "run_techqa.py", line 623, in <module>
main()
File "run_techqa.py", line 617, in main
model = train(args, train_dataset, model, optimizer, tokenizer, model_evaluator)
File "run_techqa.py", line 223, in train
outputs = model(**inputs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/apex/amp/_initialize.py", line 197, in new_fwd
**applier(kwargs, input_caster))
File "/content/MyDrive/IBM/TechQA-Base/techqa-master/model_techqa.py", line 109, in forward
Hq = self.lq(hq)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py", line 87, in forward
return F.linear(input, self.weight, self.bias)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 1372, in linear
output = input.matmul(weight.t())
RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_mm