My model perform this convolution operation on an image through 6 CNN pyramid levels with different resolutions. Unfortunately, the self.attenflow layer throw cuda out of memory error although the tensors have 2 channels only. could any one explain how to solve this problem

```
class FlowEstimator(nn.Module):
def __init__(self, ch_in, ch_out=2, bn=False):
super(FlowEstimator, self).__init__()
self.conv1 = conv(bn, ch_in, 128)
self.conv2 = conv(bn, 128, 128)
self.conv3 = conv(bn, 128 + 128, 96)
self.conv4 = conv(bn, 128 + 96, 64)
self.conv5 = conv(bn, 96 + 64, 32)
self.attenflow=Cross_Attn(2,'relu')
self.final_out = 32
self.predict_flow = CustomConv2D(64 + 32, ch_out, kernel_size=3, stride=1,
padding=1)
def forward(self, x,context):
x1 = self.conv1(x)
x2 = self.conv2(x1)
x3 = self.conv3(torch.cat([x1, x2], dim=1))
x4 = self.conv4(torch.cat([x2, x3], dim=1))
x5 = self.conv5(torch.cat([x3, x4], dim=1))
c=context[0]
flow = self.predict_flow(torch.cat([x4, x5], dim=1))
final_flow=self.attenflow(flow,c)
return x5, final_flow
```