Batchnorm1d can't run on GPU

I am using batchnorm1d() in my networks. If I remove the batchnorm1d() from the configuration the network behaves fine on GPU. When I add it back I get the following error:

File "/mnt/mscteach_home/s1877727/recommendations/spotlight/dnn_models/", line 115, in forward
    vector = layers(vector)
  File "/home/s1877727/miniconda3/envs/mlp/lib/python3.7/site-packages/torch/nn/modules/", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/s1877727/miniconda3/envs/mlp/lib/python3.7/site-packages/torch/nn/modules/", line 83, in forward
    exponential_average_factor, self.eps)
  File "/home/s1877727/miniconda3/envs/mlp/lib/python3.7/site-packages/torch/nn/", line 1697, in batch_norm
    training, momentum, eps, torch.backends.cudnn.enabled
RuntimeError: Tensor for argument #4 'running_mean' is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm)

Here is the network architecture:

class MLP(nn.Module):
    def __init__(self,layers,num_users, num_items,output_dim = 1, embedding_dim=32):
        super(MLP, self).__init__()

        self.num_users = num_users
        self.num_items = num_items
        self.latent_dim = embedding_dim

        self.embedding_user = torch.nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.latent_dim)
        self.embedding_item = torch.nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.latent_dim)

        self.layers = []
        self.layerDims = layers.copy()
        for idx in range(len(self.layerDims)-1):
            self.layers.append(nn.Linear(self.layerDims[idx], self.layerDims[idx+1]))
        list_param = []
        for a in self.layers:

        self.fc_layers = nn.ParameterList(list_param)

        self.logistic = torch.nn.Sigmoid()

    def forward(self, user_indices, item_indices):

        user_embedding = self.embedding_user(user_indices)
        item_embedding = self.embedding_item(item_indices)

        vector =[user_embedding, item_embedding], dim=-1)  # the concat latent vector

        for layers in self.layers[:-1]:
            vector = layers(vector)
            vector = nn.functional.relu(vector)
        logits = self.layers[-1](vector)
        rating = self.logistic(logits)
        return rating

    def init_weights(self,m):
        if type(m) == nn.Linear:


the problem is that you store your layers in a regular list, not an nn.ModuleList and so it is not affected by your call to model.cuda().

Thanks a lot, it works fine that’s was the issue.