I am trying to cluster some images using the code from
in which the clustering layer weights is defined as follows:
class ClusterlingLayer(nn.Module):
def __init__(self, in_features=10, out_features=10, alpha=1.0):
super(ClusterlingLayer, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.alpha = alpha
self.weight = nn.Parameter(torch.Tensor(self.out_features, self.in_features), requires_grad = True)
self.weight = nn.init.xavier_uniform_(self.weight)
def forward(self, x):
x = x.unsqueeze(1)
x = x - self.weight
x = torch.mul(x, x)
x = torch.sum(x, dim=2)
x = 1.0 + (x / self.alpha)
x = 1.0 / x
x = x ** ((self.alpha +1.0) / 2.0)
x = torch.t(x) / torch.sum(x, dim=1)
x = torch.t(x)
return x
def extra_repr(self):
return 'in_features={}, out_features={}, alpha={}'.format(
self.in_features, self.out_features, self.alpha
)
def set_weight(self, tensor):
self.weight = nn.Parameter(tensor, requires_grad = True)
def return_weight(self):
return self.weight
And I added the following code in the training process to check the data and gradient of the clustering weights.
for name, param in model.named_parameters():
if 'clustering' in name:
print('clustering weight data and grad:')
print(param.data)
print(param.grad)
On my custom dataset, the data and grad are as follows:
epoch 343
clustering weight data and grad:
tensor([[ 38.1564, 50.6772, 15.0029, 27.4586],
[ -1.5073, 44.5947, -11.2756, -53.1255],
[ 4.1370, 42.0674, -44.5653, -15.3601],
[ -6.0383, 48.9728, -25.7221, 30.2076]], device=‘cuda:0’)
tensor([[-0.0278, -0.0074, -0.0885, 0.1166],
[-0.0663, -0.0325, -0.1827, -0.0491],
[ 0.0852, 0.0231, 0.1948, -0.1382],
[ 0.0097, 0.0350, 0.0888, 0.0871]], device=‘cuda:0’)
epoch 344
clustering weight data and grad:
tensor([[ 38.1564, 50.6772, 15.0029, 27.4586],
[ -1.5073, 44.5947, -11.2756, -53.1255],
[ 4.1370, 42.0674, -44.5653, -15.3601],
[ -6.0383, 48.9728, -25.7221, 30.2076]], device=‘cuda:0’)
tensor([[-0.0279, -0.0073, -0.0888, 0.1171],
[-0.0667, -0.0326, -0.1836, -0.0493],
[ 0.0854, 0.0234, 0.1954, -0.1385],
[ 0.0097, 0.0351, 0.0894, 0.0875]], device=‘cuda:0’)
The weights of the clustering layer are not updated at all in the training process, but the grad exists and not small.
Meanwhile, I also checked the weights of other layers, they all get updated normally in each training.
Does anyone have any ideas about what is going on?