I am using BERT model based on: https://github.com/huggingface/pytorch-pretrained-BERT
class MyBertBasedModel(BertPreTrainedModel):
"""BERT model for classification.
This module is composed of the BERT model with a linear layer on top of
the pooled output.
"""
def __init__(self, config, num_labels):
super(MyBertBasedModel, self).__init__(config)
self.num_labels = num_labels
self.bidirectional = False
self.bi_dim = 2 if self.bidirectional else 1
self.bert = BertModel(config)
self.lstm = torch.nn.LSTM(config.hidden_size, config.hidden_size, bidirectional=self.bidirectional)
self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
self.classifier = torch.nn.Linear(config.hidden_size, num_labels)
self.hidden = (torch.zeros(self.bi_dim, 128, config.hidden_size, device=torch.device('cuda')),
torch.zeros(self.bi_dim, 128, config.hidden_size, device=torch.device('cuda')))
self.apply(self.init_bert_weights)
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)
sequence_output = self.dropout(sequence_output)
# lstm
lstm_out, _ = self.lstm(sequence_output, self.hidden)
logits = self.classifier(lstm_out)
# no lstm
#logits = self.classifier(sequence_output)
if labels is not None:
loss_fct = MyLoss()
active_loss = attention_mask.view(-1) == 1
active_logits = logits.view(-1, self.num_labels)[active_loss]
active_labels = labels.view(-1, self.num_labels)[active_loss]
loss = loss_fct(active_logits, active_labels)
return loss
else:
return logits
I am using .to("cuda")
in order to move all four inputs to GPU:
loss = model(input_ids, segment_ids, input_mask, label_ids)
notice label_ids is of type torch.float while the others are torch.long (maybe this causes the problem?)
And get this Error:
RuntimeError: Input and hidden tensors are not at the same device, found input tensor at cuda:1 and hidden tensor at cuda:0
When I explicitly mention .to("cuda:0")
for all inputs I get the same error.
When I don’t use parallelization (torch.nn.DataParallel(model)) I get CUDA out of memory.
this Error:
RuntimeError: CUDA out of memory. Tried to allocate 48.00 MiB (GPU 0; 10.92 GiB total capacity; 5.61 GiB already allocated; 35.50 MiB free; 122.32 MiB cached)
The full log:
raceback (most recent call last):
File "/home/nlp/noalu/sync_to_pycharm/BERT-NER/main.py", line 655, in <module>
main()
File "/home/nlp/noalu/sync_to_pycharm/BERT-NER/main.py", line 548, in main
loss = model(input_ids, segment_ids, input_mask, label_ids)
File "/home/nlp/noalu/anaconda2/envs/py36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in __call__
result = self.forward(*input, **kwargs)
File "/home/nlp/noalu/anaconda2/envs/py36/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 152, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/home/nlp/noalu/anaconda2/envs/py36/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 162, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/home/nlp/noalu/anaconda2/envs/py36/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 83, in parallel_apply
raise output
File "/home/nlp/noalu/anaconda2/envs/py36/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 59, in _worker
output = module(*input, **kwargs)
File "/home/nlp/noalu/anaconda2/envs/py36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in __call__
result = self.forward(*input, **kwargs)
File "/home/nlp/noalu/sync_to_pycharm/BERT-NER/bert.py", line 54, in forward
lstm_out, _ = self.lstm(sequence_output, self.hidden)
File "/home/nlp/noalu/anaconda2/envs/py36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in __call__
result = self.forward(*input, **kwargs)
File "/home/nlp/noalu/anaconda2/envs/py36/lib/python3.6/site-packages/torch/nn/modules/rnn.py", line 559, in forward
return self.forward_tensor(input, hx)
File "/home/nlp/noalu/anaconda2/envs/py36/lib/python3.6/site-packages/torch/nn/modules/rnn.py", line 539, in forward_tensor
output, hidden = self.forward_impl(input, hx, batch_sizes, max_batch_size, sorted_indices)
File "/home/nlp/noalu/anaconda2/envs/py36/lib/python3.6/site-packages/torch/nn/modules/rnn.py", line 522, in forward_impl
self.dropout, self.training, self.bidirectional, self.batch_first)
RuntimeError: Input and hidden tensors are not at the same device, found input tensor at cuda:1 and hidden tensor at cuda:0
Process finished with exit code 1