Dear Pytorch team,
Greetings!
I am trying to run a model with multiple GPUs, but an arguments are located on different GPUs
error occurs. I researched this problem in pytorch forum but all the solutions seem not to work.
Particularly, I have been trying to use register_buffer
self.register_buffer("positions", positions)
to solve the problem
RuntimeError: arguments are located on different GPUs at /pytorch/aten/src/THC/generic/THCTensorMasked.cu:35
But the same error still occurs.
So my torch vision is 1.0.0, and I have referred to this post to try to solve. But it didn’t work.
My code is below. It is implementing the positional embedding of transformer model. The error occurs at the very end of this piece.
class SinusoidalPositionalEmbedding(nn.Module):
"""This module produces sinusoidal positional embeddings of any length.
Padding symbols are ignored, but it is necessary to specify whether padding
is added on the left side (left_pad=True) or right side (left_pad=False).
"""
def __init__(self, embedding_dim, padding_idx=0, left_pad=0, init_size=128):
super().__init__()
self.embedding_dim = embedding_dim
self.padding_idx = padding_idx
self.left_pad = left_pad
self.weights = SinusoidalPositionalEmbedding.get_embedding(
init_size,
embedding_dim,
padding_idx,
)
# Here is the buffers
self.register_buffer('_float_tensor', torch.FloatTensor(1))
positions = None
self.register_buffer("positions", positions)
mask = None
self.register_buffer("mask", mask)
@staticmethod
def get_embedding(num_embeddings, embedding_dim, padding_idx=None):
"""Build sinusoidal embeddings.
This matches the implementation in tensor2tensor, but differs slightly
from the description in Section 3.5 of "Attention Is All You Need".
"""
half_dim = embedding_dim // 2
emb = math.log(10000) / (half_dim - 1)
emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb)
emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze(1) * emb.unsqueeze(0)
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(num_embeddings, -1)
if embedding_dim % 2 == 1:
# zero pad
emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1)
if padding_idx is not None:
emb[padding_idx, :] = 0
return emb
def forward(self, input):
"""Input is expected to be of size [bsz x seqlen]."""
bsz, seq_len = input.size()
max_pos = self.padding_idx + 1 + seq_len
if self.weights is None or max_pos > self.weights.size(0):
self.weights = SinusoidalPositionalEmbedding.get_embedding(
max_pos,
self.embedding_dim,
self.padding_idx,
)
self.weights = self.weights.type_as(self._float_tensor)
max_pos = self.padding_idx + 1 + input.size(1)
if not hasattr(make_positions, 'range_buf'):
make_positions.range_buf = input.new()
make_positions.range_buf = make_positions.range_buf.type_as(input)
if make_positions.range_buf.numel() < max_pos:
torch.arange(self.padding_idx + 1, max_pos, out=make_positions.range_buf)
self.mask = input.ne(self.padding_idx)
self.positions = make_positions.range_buf[:input.size(1)].expand_as(input)
if self.left_pad:
positions = positions - self.mask.size(1) + self.mask.long().sum(dim=1).unsqueeze(1)
# !!! Here is where the error points to
self.positions = input.clone().masked_scatter_(self.mask, self.positions[self.mask]).long()
return self.weights.index_select(0, self.positions.view(-1)).view(bsz, seq_len, -1).detach()
The error I get:
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/home/qianlim/dl_signal/transformer/modules/position_embedding.py", line 92, in forward
self.positions = input.clone().masked_scatter_(self.mask, self.positions[self.mask]).long()
RuntimeError: arguments are located on different GPUs at /pytorch/aten/src/THC/generic/THCTensorMasked.cu:35
Thank you so much in advance!