RuntimeError: arguments are located on different GPUs
I come across the problem in decoder-seq2seq part of my project.I can not find where the problem is ,can you give me a hint?
decoder_input = inputs[:, :-1]
decoder_output, decoder_hidden, attn = self.forward_step(decoder_input, decoder_hidden, encoder_outputs,
function=function)
indices = torch.arange(inputs.size(0))
if torch.cuda.is_available():
indices = indices.to(device)
probabilities = probabilities.to(device)
for t in range(1, probabilities.size(1)):
inputs = inputs.to(device)
probabilities[indices, t] = decoder_output[indices, t-1, inputs[indices, t].view(-1)].exp().view(-1)
here is the whole relevant code.
thank you sooooooo much!!
def forward(self, inputs=None, encoder_hidden=None, encoder_outputs=None,
function=F.log_softmax, teacher_forcing_ratio=0, sample=False):
ret_dict = dict()
if self.use_attention:
ret_dict[DecoderRNN.KEY_ATTN_SCORE] = list()
inputs, batch_size, max_length = self._validate_args(inputs, encoder_hidden, encoder_outputs,
function, teacher_forcing_ratio)
decoder_hidden = self._init_state(encoder_hidden)
use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
decoder_outputs = []
sequence_symbols = []
lengths = np.array([max_length] * batch_size)
def decode(step, step_output, step_attn, sample=False):
decoder_outputs.append(step_output)
if self.use_attention:
ret_dict[DecoderRNN.KEY_ATTN_SCORE].append(step_attn)
if sample:
symbols = torch.multinomial(torch.exp(decoder_outputs[-1]), 1)
probs = torch.exp(decoder_outputs[-1])[np.arange(symbols.size(0)), symbols.squeeze(1)]
else:
symbols = decoder_outputs[-1].topk(1)[1]
sequence_symbols.append(symbols)
eos_batches = symbols.data.eq(self.eos_id)
if eos_batches.dim() > 0:
eos_batches = eos_batches.cpu().view(-1).numpy()
update_idx = ((lengths > step) & eos_batches) != 0
lengths[update_idx] = len(sequence_symbols)
if sample:
return symbols, probs
return symbols
# Manual unrolling is used to support random teacher forcing.
# If teacher_forcing_ratio is True or False instead of a probability, the unrolling can be done in graph
if use_teacher_forcing:
probabilities = torch.ones(inputs.size(0), max_length + 1)
samples_sent = torch.ones(inputs.size(0), max_length + 1) * self.sos_id
hiddens = torch.zeros(max_length + 1, 2, batch_size, self.hidden_size)
hiddens[0] = decoder_hidden
decoder_input = inputs[:, :-1]
decoder_output, decoder_hidden, attn = self.forward_step(decoder_input, decoder_hidden, encoder_outputs,
function=function)
indices = torch.arange(inputs.size(0))
if torch.cuda.is_available():
indices = indices.to(device)
probabilities = probabilities.to(device)
for t in range(1, probabilities.size(1)):
probabilities[indices, t] = decoder_output[indices, t-1, inputs[indices, t].view(-1)].exp().view(-1)