when indices and offset are all 2D,
CPU:
Could you post a minimal and executable code snippet by wrapping them into three backticks ``` so we could try to reproduce and debug the issue?
import torch
# check GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")
# move to GPU
weight = torch.tensor([[1,1,1,1,1],[2,2,2,2,2],[1,2,3,4,5],[1,2,3,4,5],[1,2,3,4,5]]).to(torch.float).to(device)
indices = torch.tensor([[1, 2, 3, 4],[1, 2, 3, 4]]).to(torch.int32).to(device)
offset = torch.tensor([[0, 2],[2,3]]).to(device)
scale_grad_by_freq = False
per_sample_weights = None
sparse = False
include_last_offset = False
pad_idx = None
# sum=0, mean=1, max=2
mode = 0
print("==================== mode=sum:")
print("weight \n", weight)
print("indices \n", indices)
print("offset \n", offset)
res1 = torch.ops.aten.embedding_bag(weight=weight, indices=indices, offsets=offset,
scale_grad_by_freq=scale_grad_by_freq,
mode=mode, sparse=sparse,
per_sample_weights=per_sample_weights,
include_last_offset=include_last_offset,
padding_idx=pad_idx)
print("res1 \n", res1)
hello, can you help to check the code? Thanks