class Self_Attn(nn.Module):
""" Self attention Layer"""
def __init__(self, in_dim):
super().__init__()
# Construct the conv layers
self.query_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//2 , kernel_size= 1)
self.key_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//2 , kernel_size= 1)
self.value_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim , kernel_size= 1)
# Initialize gamma as 0
self.gamma = nn.Parameter(torch.zeros(1))
self.softmax = nn.Softmax(dim=-1)
def forward(self,x):
"""
inputs :
x : input feature maps( B * C * W * H)
returns :
out : self attention value + input feature
attention: B * N * N (N is Width*Height)
"""
m_batchsize,C,width ,height = x.size()
proj_query = self.query_conv(x).view(m_batchsize, -1, width*height).permute(0,2,1) # B * N * C
proj_key = self.key_conv(x).view(m_batchsize, -1, width*height) # B * C * N
energy = torch.bmm(proj_query, proj_key) # batch matrix-matrix product
attention = self.softmax(energy) # B * N * N
proj_value = self.value_conv(x).view(m_batchsize, -1, width*height) # B * C * N
out = torch.bmm(proj_value, attention.permute(0,2,1)) # batch matrix-matrix product
out = out.view(m_batchsize,C,width,height) # B * C * W * H
# Add attention weights onto input
out = self.gamma*out + x
return out, attention
This code defines self-attention block. If you see the code
m_batchsize,C,width ,height = x.size()
inside the forward call it is just extracting the size info of the batch. We only need to find this info at the beginning of the training. Later we can use the same value. Is there any way to specify that a particular code block should run at the beginning only, inside the forward call?