The GPU memory is not enough when computing feature centers

I want use the class feature centers in the loss function. But when I compute the centers,
the GPU memory is not enough. How can I solve it? The code is as follows. Thank you very much!

for epoch in range((args.start_epoch+1), args.epochs):
  Center= computer_Center(model,dataloader, classnum)
	for input, target in train_loader:
        target = target.cuda()
        input = input.cuda()
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        outputs, feature = model(input_var)
        l = criterion(feature,target_var, Center) .forward()
        l.backward(retain_graph=True)

def computer_Center(model,dataloader, classnum):
    model.train()
    for i in range(classnum):
            j=0
            for input,target in dataloader:
                target=target.cuda()
                input = input.cuda()
                input_var = torch.autograd.Variable(input)
                target=torch.autograd.Variable(target)
                _, feature_ext = model(input_var)
                ind=torch.where(target==i)[0]
                if ind.shape[0]>0:
                    if j==0:
                        feature_mid = feature_ext[ind, :]
                        feature_sum_mid=feature_mid.sum(0)
                    else:
                        feature_mid = feature_ext[ind, :]
                        feature_sum_mid = feature_sum_mid+feature_mid.sum(0)
                    j=j+1

            feature_sum_mid=feature_sum_mid.unsqueeze(0)
            if i==0:
                feature_sum=feature_sum_mid
            else:
                feature_sum=torch.cat([feature_sum,feature_sum_mid],dim=0)

    Center=feature_sum
    for i in range(7):
            Center[i,:]=feature_sum[i,:]/ClaSamNum[i]

return Center