Hi,

My code works perfectly fine on CPU, but when I run it on GPU, I get a “device side assert triggered” error on this line:

```
packed_inp = pack_padded_sequence(x, seq_lens[i].cpu().numpy(), batch_first=True)
```

Can I not force a tensor to numpy inside “forward” function? If not, what is the alternative to this?

I can’t run `pack_padded_sequence`

outside the “forward” method because I have an embedding layer inside my model, and the input to `pack_padded_sequence`

are the embeddings received from this layer.

The full code is given below.

My network:

```
class SiameseNetwork(nn.Module):
def __init__(self):
super().__init__()
self.name_embedding = nn.Embedding(len(embeddings), self.embedding_dim)
self.name_embedding.load_state_dict({'weight': torch.from_numpy(np.array(emb_vals))})
self.name_embedding.weight.requires_grad = False
self.lstm = nn.LSTM(512, 250, 1, bidirectional=True, batch_first=True)
self.cosine_sim_layer = nn.CosineSimilarity(dim=1)
def forward(self, inputs, seq_lens, rev_indices):
results = []
inputs = inputs.permute(1,0,2)
seq_lens, rev_indices = seq_lens.T, rev_indices.T
for i in range(2):
x = self.name_embedding(inputs[i])
self.lstm.flatten_parameters()
packed_inp = pack_padded_sequence(x, seq_lens[i].cpu().numpy(), batch_first=True)
op, (ht, ct) = self.lstm(x)
x = ht.permute(1,0,2)
x = x[rev_indices[i],:,:]
results.append(x.reshape(-1, 2*250))
x = self.cosine_sim_layer(results[0], results[1])
return x
```

My training loop:

```
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = nn.DataParallel(SiameseNetwork()).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
for epoch in range(num_epochs):
inputs_pos, targets_pos = generate_input(train_data_t, 1)
inputs_neg, targets_neg = generate_input(train_data_f, 0)
for batch_idx in range(num_batches):
batch_start = batch_idx * batch_size
batch_end = (batch_idx+1) * batch_size
batch_start_f = batch_idx * batch_size_f
batch_end_f = (batch_idx+1) * batch_size_f
inputs = np.concatenate((inputs_pos[batch_start: batch_end], inputs_neg[batch_start_f: batch_end_f]))
targets = np.concatenate((targets_pos[batch_start: batch_end], targets_neg[batch_start_f: batch_end_f]))
# Sorting sequence in order to pass it to pack_padded_sequence inside forward method
inp = inputs.transpose(1,0,2)
nonzero_elems = np.count_nonzero(inp, axis=-1)
indices = np.flip(np.argsort(nonzero_elems, axis=-1), axis=-1)
seq_lens = np.flip(np.sort(nonzero_elems, axis=-1), axis=-1)
inp_elems = np.stack((inp[0][[indices[0]]], inp[1][[indices[1]]]), axis=0).transpose(1,0,2)
inp_elems = torch.LongTensor(inp_elems).to(device)
targ_elems = torch.DoubleTensor(targets).to(device)
optimizer.zero_grad()
d1 = {elem:i for i,elem in enumerate(indices[0])}
d2 = {elem:i for i,elem in enumerate(indices[1])}
rev_indices = np.stack(([d1[k] for k in range(inp.shape[1])],
[d2[k] for k in range(inp.shape[1])]))
# rev_indices denotes the original order as it was before sorting. I am supplying this to the forward method so that the order can be restored inside forward.
rev_indices = torch.LongTensor(rev_indices.T).to(device)
seq_lens = torch.LongTensor(seq_lens.copy().T).to(device)
outputs = model(inp_elems, seq_lens, rev_indices)
loss = F.mse_loss(outputs, targ_elems)
loss.backward()
optimizer.step()
```

Any help would be appreciated!

Thanks