I have got a special deep neural network with special cost function. I have wrote my cost function with this strategy. First i create a batch of data, let say 64 images. I forward this batch through the network and finally calculate the Cost value. for creating the gradient variable for the network, I mean the variable one before my cost layer, I create a variable with same size as output variable of the network. then with for
i iterate over the images and fill this newly created tensor. Actually What i return when using backward function is that tesnor. Here is my code:
for i, (images, labels) in enumerate(trainLoader): if s in steps: learning_rate = learning_rate * 0.1 optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum, weight_decay=decay) if cuda: images = images.cuda() images = V(images) optimizer.zero_grad() output = net(images).cpu() # batches*95*S*S loss = 0 target, index = jsonToTensor(labels) target, index = V(target), V(index) cost = criterion(output, target, index) loss += cost.data[0] cost.backward() epochLoss += loss optimizer.step() print("(%d,%d) -> Current Batch Loss: %f"%(epoch, i, (loss/batches))) s = s + 1
And here is my cost:
class Criterion(Function):
def __init__(self, S, B, numClass, coords, l_coord, l_nobj):
super(Criterion, self).__init__()
self.S = S # Number of Cell
self.B = B # Number of Bouning Box
self.numClass = numClass
self.coords = coords
self.l_coord = l_coord
self.l_nobj = l_nobj
def forward(self, pred_out, real_out, index):
# pred_out -> 64*95*7*7
self.save_for_backward(pred_out, real_out ,index)
cost = 0
for i in range(pred_out.size()[0]):
if i==0:
endInx = int((index[i]).numpy())
if i==endInx:
continue
cost += self.perImageCalc(pred_out[i,:,:,:].unsqueeze(0), real_out[0:endInx,:])
else:
startInx = int((index[i-1]).numpy())
endInx = int((index[i]).numpy())
if startInx==endInx:
continue
cost += self.perImageCalc(pred_out[i,:,:,:].unsqueeze(0), real_out[startInx:endInx,:])
return cost
def perImageCalc(self, pt, rt):
# Doing Some stuff to calculate the Cost
return cost
def backward(self, grad_cost):
pt, rt, index = self.saved_tensors
grad_pred_out = torch.zeros(pt.size())
for i in range(pt.size()[0]):
if i==0:
endInx = int((index[i]).numpy())
grad_pred_out[i,:,:,:] = self.gradPerImageCalc(pt[i,:,:,:].unsqueeze(0), rt[0:endInx,:])
else:
startInx = int((index[i-1]).numpy())
endInx = int((index[i]).numpy())
grad_pred_out[i,:,:,:] = self.gradPerImageCalc(pt[i,:,:,:].unsqueeze(0), rt[startInx:endInx,:])
grad_real_out = grad_index = None
return grad_pred_out, grad_real_out, grad_index
def gradPerImageCalc(self, pt, rt):
# Doing Some Stuff
return grad_pred_out
def indices(self, a, func):
return [i for (i, val) in enumerate(a) if func(val)]
The first epoch forwarding action and backward action are good, but when i do the second backward action, i mean in the second stage of the process, I receive this run time error
:
`
RuntimeError: Trying to backward through the graph second time, but the buffers have already been freed. Please specify retain_variables=True when calling backward for the first time.
/home/mlcmdeep/Mohammad_Mahdi_Derakhshani/yo/MSCoco/Main_v2.py(182)train()
β cost.backward()
(Pdb) bt
/home/mlcmdeep/anaconda3/lib/python3.6/bdb.py(431)run()
β exec(cmd, globals, locals)
(1)()
/home/mlcmdeep/Mohammad_Mahdi_Derakhshani/yo/MSCoco/Main_v2.py(209)()
β train(epoch)
/home/mlcmdeep/Mohammad_Mahdi_Derakhshani/yo/MSCoco/Main_v2.py(182)train()
β cost.backward()
/home/mlcmdeep/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py(146)backward()
β self._execution_engine.run_backward((self,), (gradient,), retain_variables)
/home/mlcmdeep/Mohammad_Mahdi_Derakhshani/yo/MSCoco/Cost.py(128)backward()
β pt, rt, index = self.saved_tensors
`
Could you please help me what is the source of error? By the way, here is my network:
class yoloV1CocoTiny(nn.Module):
def __init__(self, inputChannel, numClass, B, S, ngpu, batchSize):
super(yoloV1CocoTiny, self).__init__()
self.ngpu = ngpu
self.batchSize = batchSize
self.main = nn.Sequential(
nn.Conv2d(3,16,3,1,1),
nn.BatchNorm2d(16),
nn.LeakyReLU(0.1),
nn.MaxPool2d(2,2),
nn.Conv2d(16,32,3,1,1),
nn.BatchNorm2d(32),
nn.LeakyReLU(0.1),
nn.MaxPool2d(2,2),
nn.Conv2d(32,64,3,1,1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.1),
nn.MaxPool2d(2,2),
nn.Conv2d(64,128,3,1,1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.1),
nn.MaxPool2d(2,2),
nn.Conv2d(128,256,3,1,1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.1),
nn.MaxPool2d(2,2),
nn.Conv2d(256,512,3,1,1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.1),
nn.MaxPool2d(2,2),
nn.Conv2d(512,1024,3,1,1),
nn.BatchNorm2d(1024),
nn.LeakyReLU(0.1),
nn.Conv2d(1024,256,3,1,1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.1),
nn.Conv2d(256,numClass + B*5,3,1,1),
nn.ReLU(True)
)
def forward(self, input):
gpu_ids = None
if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu>1:
gpu_ids = range(self.ngpu)
output = nn.parallel.data_parallel(self.main, input, gpu_ids)
else:
output = self.main(input)
return output