Runtime error: scatter function fails to provide arguments to both gpus

import torch
import torchvision
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd 
from torch.distributions import Normal
import torchvision.transforms as transforms


torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True) 
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

# Use standard FashionMNIST dataset
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()          
    ])
)

test_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = False,
    download = True,
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()          
    ])
)

class MLPLayer(nn.Module):
    """
        Hidden Layer of our BNN
    """
    def __init__(self, input_dim, output_dim, rho_prior, rho0=-6., lambda0=0.99):
        # initialize layers
        super().__init__()
        # set input and output dimensions
        self.input_dim = input_dim
        self.output_dim = output_dim

        # initialize mu, rho and theta parameters for layer's weights
        self.w_mu = nn.Parameter(torch.Tensor(input_dim, output_dim).uniform_(-0.6, 0.6))
        self.w_rho = nn.Parameter(torch.Tensor(input_dim, output_dim).uniform_(rho0, rho0))
        self.theta = nn.Parameter(logit(torch.Tensor(output_dim).uniform_(lambda0, lambda0)))
        # initialize mu, rho and theta parameters for layer's biases, theta = logit(phi)
        self.b_mu = nn.Parameter(torch.Tensor(output_dim).uniform_(-0.6, 0.6))
        self.b_rho = nn.Parameter(torch.Tensor(output_dim).uniform_(rho0, rho0))

        self.rho_prior = rho_prior
        # self.device = device

        # initialize weight samples (these will be calculated whenever the layer makes a prediction)
        self.gamma = None

        self.w = None
        self.b = None

        # initialize log pdf of prior and vb distributions
        self.kl = 0

    def forward(self, X, temp, phi_prior):
        """
            For one Monte Carlo sample
            :param X: [batch_size, input_dim]
            :return: output for one MC sample, size = [batch_size, output_dim]
        """
        # sample weights and biases
        sigma_w = torch.log(1 + torch.exp(self.w_rho))
        sigma_b = torch.log(1 + torch.exp(self.b_rho))
        sigma_prior = torch.log(1 + torch.exp(self.rho_prior)) #

        self.register_buffer('u', torch.rand(self.theta.shape))
        u = self.u #to(self.device)
        self.gamma = gumbel_softmax(self.theta, u, temp, hard=True)

        self.gamma_w = self.gamma.expand(self.input_dim, self.output_dim)
        self.gamma_b = self.gamma
        
        # epsilon_w = Normal(0, 1).sample(self.w_mu.shape)
        # epsilon_b = Normal(0, 1).sample(self.b_mu.shape)
        self.register_buffer('epsilon_w', Normal(0, 1).sample(self.w_mu.shape))
        self.register_buffer('epsilon_b', Normal(0, 1).sample(self.b_mu.shape))
        epsilon_w = self.epsilon_w #to(self.device)
        epsilon_b = self.epsilon_b #to(self.device)
        
        self.w = self.gamma_w * (self.w_mu + sigma_w * epsilon_w)
        self.b = self.gamma_b * (self.b_mu + sigma_b * epsilon_b)
        output = torch.mm(X, self.w) + self.b.expand(X.size()[0], self.output_dim)

        # record KL at sampled weight and bias
        phi = sigmoid(self.theta)
        w_phi = phi.expand(self.input_dim, self.output_dim)
        b_phi = phi

        kl_phi = phi * (torch.log(phi) - torch.log(phi_prior)) + \
               (1 - phi) * (torch.log(1 - phi) - torch.log(1 - phi_prior))
        
        kl_w = w_phi * (torch.log(sigma_prior) - torch.log(sigma_w) +
                        0.5 * (sigma_w ** 2 + self.w_mu ** 2) / sigma_prior ** 2 - 0.5)
        
        kl_b = b_phi * (torch.log(sigma_prior) - torch.log(sigma_b) +
                        0.5 * (sigma_b ** 2 + self.b_mu ** 2) / sigma_prior ** 2 - 0.5)

        self.kl = torch.sum(kl_w) + torch.sum(kl_b) + torch.sum(kl_phi)

        return output

class SFunc(nn.Module):
    def __init__(self, data_dim, hidden_dim1, hidden_dim2, target_dim, temp, phi_prior1, phi_prior2, builder, sigma_noise=1):

        # initialize the network using the MLP layer
        super().__init__()
        # self.rho_prior = torch.Tensor([np.log(np.exp(1.3) - 1)]) #.to(device)
        self.register_buffer('rho_prior', torch.Tensor([np.log(np.exp(1.3) - 1)]))
        # self.device = device
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=2)
        self.l1 = MLPLayer(data_dim, hidden_dim1, self.rho_prior) # , self.device)
        self.l2 = MLPLayer(hidden_dim1, hidden_dim2, self.rho_prior) #, self.device)
        self.l4 = OutLayer(hidden_dim2, target_dim, self.rho_prior) #, self.device)

        self.target_dim = target_dim
        # self.log_sigma_noise = torch.log(torch.Tensor([sigma_noise])) #.to(device)
        self.register_buffer('log_sigma_noise', torch.log(torch.Tensor([sigma_noise])))
        self.temp =temp
        self.phi_prior1=phi_prior1
        self.phi_prior2=phi_prior2
        self.train_len= torch.tensor(len(builder))

    def forward(self, X, y, temp,phi_prior1,phi_prior2 ):
        """
            output of the BNN for one Monte Carlo sample
            :param X: [batch_size, data_dim]
            :return: [batch_size, target_dim]
        """
        print("\tIn Model: input size", X.size())
        output = F.relu(F.max_pool2d(self.conv1(X), 2))
        output = F.relu(F.max_pool2d(self.conv2(output), 2))
        output = F.relu(self.l1(output.reshape(-1, 64*8*8), temp, phi_prior1))
        output = F.relu(self.l2(output, temp, phi_prior2))
        output = self.l4(output)
        #loss function here

        return output.squeeze()

data_size = 60000
data_dim = 64*8*8
hidden_dim1 = 64*1*1
hidden_dim2 = 64*1*1
target_dim = 10
L=2
temp = torch.tensor(0.5)
phi_prior1 = torch.tensor(0.0001)
phi_prior2 = torch.tensor(0.0001)
lr = .001
batch_size =1024
epochs = 1

torch.set_default_tensor_type('torch.cuda.FloatTensor')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size,shuffle=True,num_workers=0)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = batch_size,shuffle=False,num_workers=0)

net = SFunc(data_dim, hidden_dim1, hidden_dim2, target_dim, temp, phi_prior1,phi_prior2,train_loader)
if torch.cuda.device_count() > 1:
    print("There are", torch.cuda.device_count(), "GPUs!")
    net = torch.nn.DataParallel(net,device_ids=[0,1])
net.to(device)

optimizer = torch.optim.Adam(net.parameters(), lr=lr)

for epoch in range(epochs):
    for batch in train_loader:
        images, labels = batch[0].to(device), batch[1].to(device)
        print("Outside: input size", images.size())
        preds = net(images, labels,temp,phi_prior1,phi_prior2)
        optimizer.zero_grad()
        # loss.backward()
        optimizer.step()
    
    print("\n")
    total = 0
    correct = 0
    with torch.no_grad():
        for batch in test_loader:
            images, labels = batch[0].to(device), batch[1].to(device)
            labels_list.append(labels)
            print("Outside: input size", images.size())

            outputs = net(images, labels, temp, phi_prior1, phi_prior2)

            preds2 = torch.max(outputs, 1)[1]
            predictions_list.append(preds2)
            correct += (preds2 == labels).sum()

            total += labels.size(0)
        
    test_accuracy = correct/total

In this code I am getting an error.

Traceback (most recent call last):
  File "SSIG_Fashion-MNIST-HPCC_New.py", line 488, in <module>
    preds = net(images, labels,temp.to(device),phi_prior1.to(device),phi_prior2.to(device))
  File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/data_parallel.py", line 157, in forward
    inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
  File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/data_parallel.py", line 174, in scatter
    return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
  File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/scatter_gather.py", line 44, in scatter_kwargs
    inputs = scatter(inputs, target_gpus, dim) if inputs else []
  File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/scatter_gather.py", line 36, in scatter
    res = scatter_map(inputs)
  File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/scatter_gather.py", line 23, in scatter_map
    return list(zip(*map(scatter_map, obj)))
  File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/scatter_gather.py", line 19, in scatter_map
    return Scatter.apply(target_gpus, None, dim, obj)
  File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/_functions.py", line 93, in forward
    outputs = comm.scatter(input, target_gpus, chunk_sizes, ctx.dim, streams)
  File "/mnt/home/jantresa/anaconda3/envs/test1/lib/python3.8/site-packages/torch/nn/parallel/comm.py", line 189, in scatter
    return tuple(torch._C._scatter(tensor, devices, chunk_sizes, dim, streams))
RuntimeError: chunk expects at least a 1-dimensional tensor

I am not able to understand why when I pass simple tensor arguments to the wrapped model, I get the error mentioned here.

@VitalyFedyunin do you mind taking a look at Sanket’s problem?

I have resolved the issue.