The main code is from here:
I defined a SE attention as below:
class SEBlock(torch.nn.Module):
def __init__(self, in_channels):
super(SEBlock, self).__init__()
self.avg_pool = torch.nn.AdaptiveAvgPool2d(1)
self.fc1 = torch.nn.Conv2d(in_channels, in_channels, 1, bias=False)
self.fc2 = torch.nn.Conv2d(in_channels, in_channels, 1, bias=False)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
y = self.avg_pool(x)
y = F.relu(self.fc1(y))
y = self.fc2(y)
y = self.sigmoid(y)
return x * y
And simplified section of where error originates:
class SynthesisLayer(torch.nn.Module):
def __init__(self,
....
attention = True, # Whether use attention mechanism
):
super().__init__()
# Self attention
self.attention = attention
if self.attention:
# Initialize SEBlock here
self.se_block = SEBlock(self.out_channels) # SEBlock create
def forward(self, x, w, noise_mode='random', force_fp32=False, update_emas=False):
...
# Execute modulated conv2d.
dtype = torch.float16 if (self.use_fp16 and not force_fp32 and x.device.type == 'cuda') else torch.float32
x = modulated_conv2d(x=x.to(dtype), w=self.weight, s=styles,
padding=self.conv_kernel-1, demodulate=(not self.is_torgb), input_gain=input_gain)
if self.attention:
# Apply SEBlock for attention
x = x.to(dtype)
self.se_block = self.se_block.to(dtype)
x = self.se_block(x)
#...
return x
So, it runs ok without SE block, but when it is on I’m getting:
line 319, in training_loop
param.grad = grad.reshape(param.shape)
^^^^^^^^^^
RuntimeError: attempting to assign a gradient with dtype 'float' to a tensor with dtype 'struct c10::Half'. Please ensure that the gradient and the tensor have the same dtype
I can not reason to this since I use the same type as x and also cast it to se block. Any suggestions?