The manipulation of the .data attribute is deprecated and can yield unwanted side effects.
Copy the values into the weigth parameter and load the state_dict instead:

import torch
import numpy as np
np.random.seed(1)
# initialize input and embedding weight
max_norm = 1
x_np = np.random.randint(0, 1000, (128, 20))
weight_np = np.random.randn(1000, 100).astype(np.float32)
# cpu
m_cpu = torch.nn.Embedding(1000, 100, max_norm=max_norm)
with torch.no_grad():
m_cpu.weight.copy_(torch.tensor(weight_np))
y_cpu = m_cpu(torch.tensor(x_np))
# gpu
m_cuda = torch.nn.Embedding(1000, 100, max_norm=max_norm)#.cuda()
m_cuda.load_state_dict(m_cpu.state_dict())
m_cuda.cuda()
y_cuda = m_cuda(torch.tensor(x_np).cuda())
# normalize weight manually
indices = np.unique(x_np)
norm = np.linalg.norm(weight_np[indices], ord=2, axis=-1, keepdims=True) # ord = 2 is the default of nn.Embedding
rescale = (norm > max_norm)[:, 0]
selection = np.zeros(weight_np.shape[0], dtype=bool)
selection[indices] = rescale
weight_np[selection] *= max_norm / (norm[rescale] + 1e-7)
# max diff
print(torch.abs(m_cpu.weight - m_cuda.weight.cpu()).max()) # max diff between weight renormalized in gpu and cpu
print(torch.abs(m_cpu.weight - torch.tensor(weight_np)).max())

with torch.no_grad():
m.weight.copy_(torch.tensor(weight_np))

but the result is wrong as before.
Are you suggesting that, to replace the weight, I need to create another nn.Embedding instance, replace its weight, then load its state_dict?