The function code used is as follows:
def batch_prototype(feature,mask): #return B*C*feature_size
batch_pro = torch.zeros(mask.shape[0], mask.shape[1], feature.shape[1]).to('cuda')
for i in range(mask.shape[1]):
classmask = mask[:,i,:,:]
proclass = masked_average_pooling(feature,classmask.unsqueeze(1))
batch_pro[:,i,:] = proclass
return batch_pro
This piece of code is used in decode:
class Decoder_idea(nn.Module):
def __init__(self, params):
super(Decoder_idea, self).__init__()
self.params = params
self.in_chns = self.params['in_chns']
self.ft_chns = self.params['feature_chns']
self.n_class = self.params['class_num']
self.bilinear = self.params['bilinear']
assert (len(self.ft_chns) == 5)
self.up1 = UpBlock(
self.ft_chns[4], self.ft_chns[3], self.ft_chns[3], dropout_p=0.0)
self.up2 = UpBlock(
self.ft_chns[3], self.ft_chns[2], self.ft_chns[2], dropout_p=0.0)
self.up3 = UpBlock(
self.ft_chns[2], self.ft_chns[1], self.ft_chns[1], dropout_p=0.0)
self.up4 = UpBlock(
self.ft_chns[1], self.ft_chns[0], self.ft_chns[0], dropout_p=0.0)
self.out_conv = nn.Conv2d(self.ft_chns[0], self.n_class,
kernel_size=3, padding=1)
def forward(self, feature):
x0 = feature[0]
x1 = feature[1]
x2 = feature[2]
x3 = feature[3]
x4 = feature[4]
x = self.up1(x4, x3)
up1_out=x
x = self.up2(x, x2)
up2_out=x
x = self.up3(x, x1)
up3_out=x
x = self.up4(x, x0)
up4_out=x
# print(x.shape,'feature_shape')
output = self.out_conv(x)
mask = torch.softmax(output,dim=1)
**batch_pro = batch_prototype(x,mask)**
torch.cuda.empty_cache()
intermediate_out = {
'up1': up1_out,
'up2': up2_out,
'up3': up3_out,
'up4': up4_out,
}
return output, intermediate_out, batch_pro
Now my problem is that every time I use the batch_prototype function to calculate batch_pro, the occupied cuda memory cannot be released. Therefore, as time goes by, I will be prompted to exceed the memory that can be allocated by cuda. How can I solve this problem?