Tensor for argument #2 'weight' is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm)

Hi !, I just started training my model with PyTorch. However, I met a problem when I trained my model with GPU. The error message is shown below.
RuntimeError: Tensor for argument #2 ‘weight’ is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm)
And this is my code of model
class BN(nn.Module):
def init(self, input):
super(BN, self).init()
self.input = input
self.bn = nn.BatchNorm1d(input.size()[1], momentum=0.5)
self.bn.cuda()

def forward(self, x):
    x = self.bn(x.float())
    x = torch.as_tensor(x).long()
    return x

class DRLSTM(nn.Module):
def init(self,
vocab_size,
embedding_dim,
hidden_size,
embeddings=None,
padding_idx=0,
dropout=0.5,
num_classes=3,
device=“cpu”):

    super(DRLSTM, self).__init__()

    self.vocab_size = vocab_size
    self.embedding_dim = embedding_dim
    self.hidden_size = hidden_size
    self.num_classes = num_classes
    self.dropout = dropout
    self.device = device
    self.bn = nn.BatchNorm1d(64, momentum=0.8).to(self.device)


    self.debug = False

    self._word_embedding = nn.Embedding(self.vocab_size,
                                        self.embedding_dim,
                                        padding_idx=padding_idx,
                                        _weight=embeddings)
    # print ('embedding_dim: ')
    # print (embedding_dim)
    if self.dropout:
        self._rnn_dropout = RNNDropout(p=self.dropout)
        # self._rnn_dropout = nn.Dropout(p=self.dropout)

    self._encoding = Seq2SeqEncoder(nn.LSTM,
                                    self.embedding_dim,
                                    self.hidden_size,
                                    bidirectional=True)

    # self._encoding1 = Seq2SeqEncoder(nn.LSTM,
    #                                 self.embedding_dim,
    #                                 int(self.hidden_size/2),
    #                                 bidirectional=True)
    # self._encoding2 = Seq2SeqEncoder(nn.LSTM,
    #                                 self.embedding_dim,
    #                                 #self.hidden_size,
    #                                 self.hidden_size,
    #                                 bidirectional=True)

    self._attention = SoftmaxAttention()

    self._projection = nn.Sequential(nn.Linear(4 * 2 * self.hidden_size,
                                               self.hidden_size),
                                     nn.ReLU()
                                     )

    self._composition = Seq2SeqEncoder(nn.LSTM,
                                       self.hidden_size,
                                       self.hidden_size,
                                       bidirectional=True)

    self._composition1 = Seq2SeqEncoder(nn.LSTM,
                                        self.hidden_size,
                                        self.hidden_size,
                                        bidirectional=True)

    self._composition2 = Seq2SeqEncoder(nn.LSTM,
                                        2 * self.hidden_size,
                                        self.hidden_size,
                                        bidirectional=True)

    self._classification = nn.Sequential(nn.Dropout(p=self.dropout),
                                         nn.Linear(2 * 4 * self.hidden_size,
                                                   self.hidden_size),
                                         nn.Tanh(),

                                         nn.Dropout(p=self.dropout),
                                         nn.Linear(self.hidden_size,
                                                   self.num_classes))

    # Initialize all weights and biases in the model.
    self.apply(_init_model_weights)

def forward(self,
            premises,
            premises_lengths,
            hypotheses,
            hypotheses_lengths):

    premises_mask = get_mask(premises, premises_lengths).to(self.device)
    hypotheses_mask = get_mask(hypotheses, hypotheses_lengths) \
        .to(self.device)


    # BN1 = nn.BatchNorm1d(premises.size()[1], momentum=0.5).to(self.device)
    # BN2 = nn.BatchNorm1d(hypotheses.size()[1], momentum=0.5).to(self.device)
    # premises = BN1(premises.float())
    # hypotheses = BN2(hypotheses.float())
    # premises = self.bn(premises.float())
    # hypotheses = self.bn(hypotheses.float())
    # premises = torch.as_tensor(premises).long()
    # hypotheses = torch.as_tensor(hypotheses).long()
    bn1 = BN(premises)
    premises = bn1(premises)
    bn2 = BN(hypotheses)
    hypotheses = bn2(hypotheses)

I wonder why the error comes.
Thank you!

In DRLSTM you are using the device='cpu' argument as the default value and then apply it to self.bn, which would push this layer to the CPU. If you are passing a CUDATensor to this module, this error will be raised.
The better approach is not to specify the device as an argument and just calling model.to(device) after creating the object.

PS: you can post code snippets by wrapping them into three backticks ```, which makes debugging easier. :wink:

Hi, I am building a kind of Generator Network in this I am getting the same error. I am new to PyTorch Framework
Error is shown:

/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
   2056     return torch.batch_norm(
   2057         input, weight, bias, running_mean, running_var,
-> 2058         training, momentum, eps, torch.backends.cudnn.enabled
   2059     )
   2060 

RuntimeError: Tensor for argument #2 'weight' is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm)
class netG(nn.Module):
  def __init__(self,channel_rate=64,drop_rate=0.0):
    super(netG, self).__init__()
    self.channel_rate = channel_rate
    self.nChannels = 4*self.channel_rate
    self.drop_rate = drop_rate
    self.dilation_factor = 1
    self.head = nn.Conv2d(3,4*self.channel_rate,3,1,1)
  
  def dense_block(self,x):
    block = nn.Sequential(
    nn.BatchNorm2d(self.nChannels),
    nn.LeakyReLU(0.2), 
    nn.Conv2d(self.nChannels,4*self.channel_rate,1,1),
    nn.BatchNorm2d(4*self.channel_rate),
    nn.Conv2d(4*self.channel_rate,self.channel_rate,3,1,self.dilation_factor,self.dilation_factor),
    nn.BatchNorm2d(self.channel_rate),
    nn.Dropout2d(self.drop_rate)
    )
    return block(x)


  def tail(self,x):
    block = nn.Sequential(
    nn.LeakyReLU(0.2),
    nn.Conv2d(self.nChannels,4*self.channel_rate,1,1),
    nn.BatchNorm2d(4*self.channel_rate),
    nn.Dropout(self.drop_rate)
    )
    return block(x)
    
  def last_layer(self,x):
    block= nn.Sequential(
    nn.Conv2d(self.nChannels,self.channel_rate,3,1,1),
    nn.PReLU(),
    nn.Conv2d(self.channel_rate,3,3,1,1),
    nn.Tanh()
    )
    return block(x)

  def forward(self,x):
    x = self.head(x)
    x1 = x
    self.nChannels = x.size(1)
    self.dilation_factor = 1
    d1 = self.dense_block(x)
    x = torch.cat((x,d1),1)
    self.dilation_factor = 1
    self.nChannels = x.size(1)
    d2 = self.dense_block(x)
    x = torch.cat((x,d2),1)
    self.dilation_factor = 2
    self.nChannels  = x.size(1)
    d4 = self.dense_block(x)
    x = torch.cat((x,d4),1)
    self.dilation_factor = 1
    self.nChannels  = x.size(1)
    d5 = self.dense_block(x)
    x = torch.cat((x,d5),1)
    self.dilation_factor = 3
    self.nChannels  = x.size(1)
    d6 = self.dense_block(x)
    x = torch.cat((x,d6),1)
    self.dilation_factor = 1
    self.nChannels  = x.size(1)
    d7 = self.dense_block(x)
    x = torch.cat((x,d7),1)
    self.dilation_factor = 2
    self.nChannels  = x.size(1)
    d8 = self.dense_block(x)
    x = torch.cat((x,d8),1)
    self.dilation_factor = 1
    self.nChannels  = x.size(1)
    d9 = self.dense_block(x)
    x = torch.cat((x,d9),1)
    self.dilation_factor = 1
    self.nChannels  = x.size(1)
    d10 = self.dense_block(x)
    x = self.tail(x)
    x = torch.cat((x,x1),1)
    self.nChannels  = x.size(1)
    x = self.last_layer(x)
    return x

You are recreating modules inside the forward method of netG, which won’t register them during the model creation and will reinitialize these modules with random values in each forward pass, which is most likely not what you want.
Create all modules in netG.__init__ and use them in netG.forward as:

class netG(nn.Module):
  def __init__(self,channel_rate=64,drop_rate=0.0):
    super(netG, self).__init__()
    self.channel_rate = channel_rate
    self.nChannels = 4*self.channel_rate
    self.drop_rate = drop_rate
    self.dilation_factor = 1
    self.head = nn.Conv2d(3,4*self.channel_rate,3,1,1)
    self.dense_block = nn.Sequential(...)
    self.tail = nn.Sequential(...)
    self.last_layer = nn.Sequential(...)

def forward(self, x):
    x = self.head(x)
    ...
    x = self.dense_block(x)
    ...

I tried that earlier, but the problem is that i want to use differnet nChannels and dilation factor, but if I make modules inside __init__ then it will be initialized with same initial value of nChannels and dilation factor, which I don’t want. Can you suggest any solution.

class netG(nn.Module):
  def __init__(self,channel_rate=64,drop_rate=0.0):
    super(netG, self).__init__()
    self.channel_rate = channel_rate
    self.nChannels = 4*self.channel_rate
    self.drop_rate = drop_rate
    self.head = nn.Conv2d(3,4*self.channel_rate,3,1,1)
    self.dilation_factor = 1
  
    self.dense_block = nn.Sequential(
      nn.BatchNorm2d(self.nChannels),
      nn.LeakyReLU(0.2), 
      nn.Conv2d(self.nChannels,4*self.channel_rate,1,1),
      nn.BatchNorm2d(4*self.channel_rate),
      nn.Conv2d(4*self.channel_rate,self.channel_rate,3,1,self.dilation_factor,self.dilation_factor),
      nn.BatchNorm2d(self.channel_rate),
      nn.Dropout2d(self.drop_rate)
    )

    self.tail = nn.Sequential(
      nn.LeakyReLU(0.2),
      nn.Conv2d(self.nChannels,4*self.channel_rate,1,1),
      nn.BatchNorm2d(4*self.channel_rate),
      nn.Dropout(self.drop_rate)
    )
    
    self.last_layer = nn.Sequential(
      nn.Conv2d(self.nChannels,self.channel_rate,3,1,1),
      nn.PReLU(),
      nn.Conv2d(self.channel_rate,3,3,1,1),
      nn.Tanh()
    )

  def forward(self,x):
    x = self.head(x)
    x1 = x
    self.nChannels = x.size(1)
    self.dilation_factor = 1
    d1 = self.dense_block(x)
    x = torch.cat((x,d1),1)
    self.dilation_factor = 1
    self.nChannels = x.size(1)
    d2 = self.dense_block(x)
    x = torch.cat((x,d2),1)
    self.dilation_factor = 2
    self.nChannels  = x.size(1)
    d4 = self.dense_block(x)
    x = torch.cat((x,d4),1)
    self.dilation_factor = 1
    self.nChannels  = x.size(1)
    d5 = self.dense_block(x)
    x = torch.cat((x,d5),1)
    self.dilation_factor = 3
    self.nChannels  = x.size(1)
    d6 = self.dense_block(x)
    x = torch.cat((x,d6),1)
    self.dilation_factor = 1
    self.nChannels  = x.size(1)
    d7 = self.dense_block(x)
    x = torch.cat((x,d7),1)
    self.dilation_factor = 2
    self.nChannels  = x.size(1)
    d8 = self.dense_block(x)
    x = torch.cat((x,d8),1)
    self.dilation_factor = 1
    self.nChannels  = x.size(1)
    d9 = self.dense_block(x)
    x = torch.cat((x,d9),1)
    self.dilation_factor = 1
    self.nChannels  = x.size(1)
    d10 = self.dense_block(x)
    x = self.tail(x)
    x = torch.cat((x,x1),1)
    self.nChannels  = x.size(1)
    x = self.last_layer(x)
    return x

You can change the dilation by directly accessing the attribute:

conv = nn.Conv2d(1, 1, 3, dilation=1)
x = torch.randn(1, 1, 24, 24)
out = conv(x)
print(out.shape)
> torch.Size([1, 1, 22, 22])

conv.dilation = (2, 2)
out = conv(x)
print(out.shape)
> torch.Size([1, 1, 20, 20])

However, changing the number of output channels would change the weight parameter, since you would be adding new kernels or would remove some, so how should this be performed?

That I didn’t know thanks, but still the problem is like I have made the blocks and want to access specific layers, Conv2D, and BatchNorm, for dilation and features respectively, which are changing. Can I access them making a new nn.Module type class for each block (head, dense tail)? Like Multiple inheritance. But again it will be same problem as nn.Module will be again made again and again and CPU initializes automatically.
Can u suggest any method?

If your use case dictates changing the filters frequently (e.g. by adding more filters or removing some), I would use the functional API via F.conv2d and define the weight and bias tensors manually.
This would give you more flexibility than trying to manipulate the parameters inside the nn.Conv2d module.

1 Like

I have made few changes now I am having CUDA Memory allocation error

class netG(nn.Module):
  def __init__(self,nChannels=256,channel_rate=64,drop_rate=0.0,dilation=1):
    super(netG, self).__init__()
    self.channel_rate = channel_rate
    self.nChannels = 4*self.channel_rate
    self.drop_rate = drop_rate
    self.dilation = 1
    self.head = nn.Conv2d(3,4*self.channel_rate,3,1,1)
  
  def dense_block(self,x):
    bn1 = nn.BatchNorm2d(self.nChannels),
    act = nn.LeakyReLU(0.2), 
    conv1 = nn.Conv2d(self.nChannels,4*self.channel_rate,1,1),
    bn2 = nn.BatchNorm2d(4*self.channel_rate),
    conv2 = nn.Conv2d(4*self.channel_rate,self.channel_rate,3,1,self.dilation,self.dilation),
    bn3 = nn.BatchNorm2d(self.channel_rate),
    dropout = nn.Dropout2d(self.drop_rate)
    def forward(x):
      x = bn1(x)
      x = act(x)
      x = conv1(x)
      x = bn2(x)
      x = conv2(x)
      x = bn3(x)
      x = dropout(x)
    return x

  def tail(self,x):
    bn1 = nn.BatchNorm2d(self.nChannels)
    act = nn.LeakyReLU(0.2)
    conv1 = nn.Conv2d(self.nChannels,4*self.channel_rate,1,1)
    bn2 = nn.BatchNorm2d(4*self.channel_rate)
    dropout = nn.Dropout(self.drop_rate)
    def forward(x):
      x = bn1(x)
      x = act(x)
      x = conv1(x)
      x = bn2(x)
      x = dropout(x)
    return x
    
  def last_layer(self,x):
    conv1 = nn.Conv2d(self.nChannels,self.channel_rate,3,1,1),
    act = nn.PReLU(),
    conv2 = nn.Conv2d(self.channel_rate,3,3,1,1),
    act1 = nn.Tanh()
    def forward(x):
      x = conv1(x)
      x = act(x)
      x = conv2(x)
      x = act1(x)
    return x
    

  def forward(self,x):
    x = self.head(x)
    x1 = x

    self.nChannels = x.size(1)
    self.dilation = 1
    d1 = self.dense_block(x)
    x = torch.cat((x,d1),1)
    
    self.nChannels = x.size(1)
    self.dilation = 1
    d2 = self.dense_block(x)
    x = torch.cat((x,d2),1)
    
    self.nChannels = x.size(1)
    self.dilation = 2
    d3 = self.dense_block(x)
    x = torch.cat((x,d3),1)
   
    self.nChannels = x.size(1)
    self.dilation = 1
    d4 = self.dense_block(x)
    x = torch.cat((x,d4),1)
    
    self.nChannels = x.size(1)
    self.dilation = 3
    d5 = self.dense_block(x)
    x = torch.cat((x,d5),1)
    
    self.nChannels = x.size(1)
    self.dilation = 3
    d6 = self.dense_block(x)
    x = torch.cat((x,d6),1)
    
    self.nChannels = x.size(1)
    self.dilation = 1
    d7 = self.dense_block(x)
    x = torch.cat((x,d7),1)
    
    self.nChannels = x.size(1)
    self.dilation = 2
    d8 = self.dense_block(x)
    x = torch.cat((x,d8),1)

    self.nChannels = x.size(1)
    self.dilation = 1
    d9 = self.dense_block(x)
    x = torch.cat((x,d9),1)
    
    self.nChannels = x.size(1)
    self.dilation = 1
    d10 = self.dense_block(x)

    self.nChannels = x.size(1)
    x = self.tail(d10)

    x = torch.cat((x,x1),1)

    self.nChannels = x.size(1)
    x = last_layer(x)

    return x
     88     self.dilation = 3
     89     d6 = self.dense_block(x)
---> 90     x = torch.cat((x,d6),1)
     91 
     92     self.nChannels = x.size(1)

RuntimeError: CUDA out of memory. Tried to allocate 12.00 GiB (GPU 0; 15.90 GiB total capacity; 11.82 GiB already allocated; 3.24 GiB free; 11.85 GiB reserved in total by PyTorch)

You are running out of memory in the torch.cat operation, so try to reduce e.g. the batch size.

Even with 1 batch_size (1 sample) error is there.

   85     self.dilation = 3
     86     d6 = self.dense_block(x)
---> 87     x = torch.cat((x,d6),1)
     88 
     89     self.nChannels = x.size(1)

RuntimeError: CUDA out of memory. Tried to allocate 4.00 GiB (GPU 0; 7.43 GiB total capacity; 3.94 GiB already allocated; 2.86 GiB free; 3.94 GiB reserved in total by PyTorch)

How much memory does your GPU have? Your model might just be too big. You could then either reduce the spatial input shape (if possible), slim down the model, or use torch.utils.checkpoint to trade compute for memory.

I think there is no way to manipulate modules directly in the way I want to do, I should go for functional api based method only. Thanks.

I am using google colab

Have a look at this:

class netG(nn.Module):
  def __init__(self,nChannels=256,channel_rate=64,drop_rate=0.0,dilation=1):
    super(netG, self).__init__()
    self.channel_rate = channel_rate
    self.nChannels = 4*self.channel_rate
    self.drop_rate = drop_rate
    self.dilation = 1
 
  def forward(self,x):
    #head
    x = F.conv2d(x, nn.Parameter(torch.Tensor(3, 4*self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False)
    x1 = x
    #dense_block-1
    self.nChannels = x.size(1)
    self.dilation = 1
    d = F.batch_norm(x,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.leaky_relu(d,0.2)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    d = F.batch_norm(d,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(4*self.channel_rate,self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False,padding=self.dialtion,dilation=self.dilation)
    d = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.dropout2d(d,p=0.0)
    
    x = torch.cat([x,d],1)
    #dense_block-2
    self.nChannels = x.size(1)
    self.dilation = 1
    d = F.batch_norm(x,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.leaky_relu(d,0.2)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    d = F.batch_norm(d,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(4*self.channel_rate,self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False,padding=self.dialtion,dilation=self.dilation)
    d = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.dropout2d(d,p=0.0)
    
    x = torch.cat([x,d],1)
    #dense_block-3
    self.nChannels = x.size(1)
    self.dilation = 2
    d = F.batch_norm(x,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.leaky_relu(d,0.2)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    d = F.batch_norm(d,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(4*self.channel_rate,self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False,padding=self.dialtion,dilation=self.dilation)
    d = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.dropout2d(d,p=0.0)
    
    x = torch.cat([x,d],1)
    #dense_block-4
    self.nChannels = x.size(1)
    self.dilation = 1
    d = F.batch_norm(x,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.leaky_relu(d,0.2)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    d = F.batch_norm(d,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(4*self.channel_rate,self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False,padding=self.dialtion,dilation=self.dilation)
    d = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.dropout2d(d,p=0.0)
    
    x = torch.cat([x,d],1)
    #dense_block-5
    self.nChannels = x.size(1)
    self.dilation = 3
    d = F.batch_norm(x,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.leaky_relu(d,0.2)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    d = F.batch_norm(d,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(4*self.channel_rate,self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False,padding=self.dialtion,dilation=self.dilation)
    d = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.dropout2d(d,p=0.0)
    
    x = torch.cat([x,d],1)
    #dense_block-6
    self.nChannels = x.size(1)
    self.dilation = 1
    d = F.batch_norm(x,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.leaky_relu(d,0.2)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    d = F.batch_norm(d,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(4*self.channel_rate,self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False,padding=self.dialtion,dilation=self.dilation)
    d = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.dropout2d(d,p=0.0)
    
    x = torch.cat([x,d],1)
    #dense_block-7
    self.nChannels = x.size(1)
    self.dilation = 2
    d = F.batch_norm(x,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.leaky_relu(d,0.2)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    d = F.batch_norm(d,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(4*self.channel_rate,self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False,padding=self.dialtion,dilation=self.dilation)
    d = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.dropout2d(d,p=0.0)
    
    x = torch.cat([x,d],1)
    #dense_block-8
    self.nChannels = x.size(1)
    self.dilation = 1
    d = F.batch_norm(x,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.leaky_relu(d,0.2)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    d = F.batch_norm(d,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(4*self.channel_rate,self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False,padding=self.dialtion,dilation=self.dilation)
    d = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.dropout2d(d,p=0.0)
    
    x = torch.cat([x,d],1)

    #dense_block-9
    self.nChannels = x.size(1)
    self.dilation = 1
    d = F.batch_norm(x,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.leaky_relu(d,0.2)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    d = F.batch_norm(d,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(4*self.channel_rate,self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False,padding=self.dialtion,dilation=self.dilation)
    d = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.dropout2d(d,p=0.0)
    
    x = torch.cat([x,d],1)

    #dense_block-10
    self.nChannels = x.size(1)
    self.dilation = 1
    d = F.batch_norm(x,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.leaky_relu(d,0.2)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    d = F.batch_norm(d,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    d = F.conv2d(d, nn.Parameter(torch.Tensor(4*self.channel_rate,self.channel_rate, 3, 3).normal_(0,0.0001)),bias=False,padding=self.dialtion,dilation=self.dilation)
    d = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    d = F.dropout2d(d,p=0.0)
    
    #tail
    x = F.batch_norm(d,torch.zeros(self.nChannels),torch.ones(self.nChannels),training=True,momentum=0.9)
    x = F.conv2d(x, nn.Parameter(torch.Tensor(self.nChannels, 4*self.channel_rate, 1, 1).normal_(0,0.0001)),bias=False)
    x = F.batch_norm(x,torch.zeros(4*self.channel_rate),torch.ones(4*self.channel_rate),training=True,momentum=0.9)
    x = F.dropout2d(x,p=0.0)

    x = torch.cat([x,x1],1)
    
    #last_layer
    x = F.conv2d(x,nn.Parameter(torch.Tensor(self.nChannels, self.channel_rate,3,3).normal_(0,0.0001)),bias=False)
    x = F.prelu(x,nn.Parameter(torch.Tensor(self.channel_rate)))
    x = F.conv2d(x,nn.Parameter(torch.Tensor(self.channel_rate,3,3,3).normal_(0,0.0001)),bias=False)
    x = F.tanh(x)

    return x

Let me know what the problem is, how should I pass parameter list to optimizer ?
As I am getting an error:
ValueError: optimizer got an empty parameter list

you can try to change the version of pytorch

I have successfully developed the model using functional API way only. Thanks for the help.
@ptrblck