iam new to NLP and currently iam doing this code below wehre iam trying to save the Last Hidden States of the DistilBert model , the data is huge so iam using GPU
but i don’t know why i face this problem ==> Input, output and indices must be on the current device
my code ==>
-
tokenized = data[‘txt’].apply((lambda x: tokenizer.encode(x, add_special_tokens=True,truncation = True, max_length = 512)))
-
max_len = 0
for i in tokenized.values:
if len(i) > max_len:
max_len = len(i)
padded = np.array([i + [0]*(max_len-len(i)) for i in tokenized.values])
-
np.array(padded).shape
-
attention_mask = np.where(padded != 0, 1, 0)
-
input_ids = torch.tensor(padded).to(device)
-
attention_mask = torch.tensor(attention_mask).to(device)
-
with torch.no_grad():
last_hidden_states = model(input_ids, attention_mask=attention_mask).to(device)
= ---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
in
1 with torch.no_grad():
----> 2 last_hidden_states = model(input_ids, attention_mask=attention_mask).to(device)
C:\Users\jod204.conda\envs\gpu_pyt\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
→ 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
C:\Users\jod204.conda\envs\gpu_pyt\lib\site-packages\transformers\models\distilbert\modeling_distilbert.py in forward(self, input_ids, attention_mask, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict)
478
479 if inputs_embeds is None:
→ 480 inputs_embeds = self.embeddings(input_ids) # (bs, seq_length, dim)
481 return self.transformer(
482 x=inputs_embeds,
C:\Users\jod204.conda\envs\gpu_pyt\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
→ 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
C:\Users\jod204.conda\envs\gpu_pyt\lib\site-packages\transformers\models\distilbert\modeling_distilbert.py in forward(self, input_ids)
105 position_ids = position_ids.unsqueeze(0).expand_as(input_ids) # (bs, max_seq_length)
106
→ 107 word_embeddings = self.word_embeddings(input_ids) # (bs, max_seq_length, dim)
108 position_embeddings = self.position_embeddings(position_ids) # (bs, max_seq_length, dim)
109
C:\Users\jod204.conda\envs\gpu_pyt\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
→ 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
C:\Users\jod204.conda\envs\gpu_pyt\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input)
156 return F.embedding(
157 input, self.weight, self.padding_idx, self.max_norm,
→ 158 self.norm_type, self.scale_grad_by_freq, self.sparse)
159
160 def extra_repr(self) → str:
C:\Users\jod204.conda\envs\gpu_pyt\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1914 # remove once script supports set_grad_enabled
1915 no_grad_embedding_renorm(weight, input, max_norm, norm_type)
→ 1916 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1917
1918
RuntimeError: Input, output and indices must be on the current device
– in the last line , i tried to run it with and without [to(device)] but same error each time
how i can solve this problem