RuntimeError: CUDA out of memory. Tried to allocate 24.00 GiB (GPU 0; 14.73 GiB total capacity; 2.51 GiB already allocated; 11.42 GiB free; 43.89 MiB cached)

The following code on execution gives me the above error:

class Repcnn(torch.nn.Module):
  def __init__(self,wfp):
    super(Repcnn,self).__init__()
    self.a,self.b=self.initialize(wfp)
    
  def initialize(self,wfp):
    wtilde=wfp/torch.std(wfp)
    sigma_a=0.95-((0.95-0.05)*torch.abs(wtilde))
    sigma_b=0.5*(1+(wfp/(1-sigma_a)))
    sigma_a=torch.clamp(sigma_a,0.05,0.95)
    sigma_b=torch.clamp(sigma_b,0.05,0.95)
    a=torch.log(sigma_a/(1-sigma_a)).requires_grad_().cuda()
    b=torch.log(sigma_b/(1-sigma_b)).requires_grad_().cuda()
    return torch.nn.Parameter(a),torch.nn.Parameter(b)
  
  def forward(self,x):
    
    weight_m= (2*sigm(self.b)-(2*sigm(self.a)*sigm(self.b))-1+sigm(self.a))
    weight_v=(1-sigm(self.a))-weight_m**2
    assert torch.all(weight_v>=0)
    om=F.conv2d(x,weight_m,padding=1)
    ov=F.conv2d(x**2,weight_v,padding=1)
    assert torch.all(ov>=0)
    e=torch.randn_like(ov).cuda()
    z=om+(ov*e)
    return z
  

class Repfc(torch.nn.Module):
  def __init__(self,wfp):
    super(Repfc,self).__init__()
    self.a1,self.b1=self.initialize(wfp)
    
  def initialize(self,wfp):
    
    wtilde=wfp/torch.std(wfp)
    sigma_a=0.95-((0.95-0.05)*torch.abs(wtilde))
    sigma_b=0.5*(1+(wfp/(1-sigma_a)))
    sigma_a=torch.clamp(sigma_a,0.05,0.95)
    sigma_b=torch.clamp(sigma_b,0.05,0.95)
    a=torch.log(sigma_a/(1-sigma_a)).requires_grad_()
    b=torch.log(sigma_b/(1-sigma_b)).requires_grad_()
    return torch.nn.Parameter(a),torch.nn.Parameter(b) 
  
  
  def forward(self,x):
    
    weight_m=(2*sigm(self.b1)-(2*sigm(self.a1)*sigm(self.b1))-1+sigm(self.a1))
    weight_v=(1-sigm(self.a1))-weight_m**2
    om=torch.matmul(weight_m,x)
    ov=torch.matmul(weight_v,x**2)
    e=torch.randn(ov.shape).cuda()
    z=om+(ov*e)
    
    return z
 

model=torch.load('/content/cifar_fullprecison_vgg19_valid_shayer_change.pth',map_location='cpu')
wfp=[]
wfp.append(model['layer1.0.weight'])
wfp.append(model['layer1.3.weight'])
wfp.append(model['layer2.0.weight'])
wfp.append(model['layer2.3.weight'])
wfp.append(model['layer3.0.weight'])
wfp.append(model['layer3.3.weight'])
wfp.append(model['layer4.0.weight'])
wfp.append(model['layer4.3.weight'])

for i in range(len(wfp)):
  wfp[i]=torch.Tensor(wfp[i])
  
  
class Conv_Net(torch.nn.Module):
  def __init__(self,wfp):
    super(Conv_Net,self).__init__()
    self.hidden=torch.nn.ModuleList([])
    self.batchnorm=torch.nn.ModuleList([])
    for i in range(6):
      cnn=Repcnn(wfp[i])
      self.hidden.append(cnn)
    for j in range(2):
      fc=Repfc(wfp[i])
      i+=1
      self.hidden.append(fc)
    batch_dim=[128,256,512]
    for i in batch_dim:
      self.batchnorm.append(torch.nn.BatchNorm2d(i))
    self.mp=torch.nn.MaxPool2d(kernel_size=2,stride=2)
  def forward(self,x):
    op=x
    j=0
    while(j<6):
      obj=self.hidden[j]
      obj_next=self.hidden[j+1]
      b=self.batchnorm[j//2]
      j+=2
      op=self.mp(b(F.relu(obj_next(b(F.relu(obj(op)))))))
    op=op.view(op.size(0),-1)
    obj=self.hidden[j]
    op=F.dropout(F.relu(obj(op)))
    j+=1
    obj=self.hidden[j]
    yout=obj(op)
    
    return yout
  


net=Conv_Net(wfp).to(device)

def l2_reg():
  
  sum=0
  for p in net.parameters():
    sum+=p.norm(2)
  return sum
l_rate=0.01
beta_param=1e-11
weight_decay=1e-4
optimizer=torch.optim.Adam(net.parameters(),lr=l_rate,weight_decay=weight_decay)
criterion=torch.nn.CrossEntropyLoss()
net.train()
num_epochs=290
for epoch in range(num_epochs):
  for i,(images,labels) in enumerate(train_loader):
    images=images.to(device)
    labels=labels.to(device)
    optimizer.zero_grad()
    yout=net(images)
    loss_batch=criterion(yout,labels)+(beta_param*l2_reg())
    loss_batch.backward()
    optimizer.step()
    torch.cuda.empty_cache()
  print('epoch {}'.format(epoch),'loss {}'.format(loss_batch.item()))
    ```

I find it really hard to debug this error. Have I defined my model parameters self.a,self.b self.a1,self.b1 correctly?