Hello, I am new to machine learning and trying to clusters using CNN. But when I am running the model with MNIST data set the output of the model collapsing to zero. I am feeling frustrating as I do not know where problem is.
The following is the model:
Define the CNN architecture
class Convolutional_layer(nn.Module):
def __init__(self):
super(Convolutional_layer, self).__init__()
self.con1 = self.con_module1(1, 32)
self.con2 = self.con_module2(32, 64)
self.con3 = self.con_module1(64, 128)
self.con4 = self.con_module2(128, 128)
self.con5 = self.con_module1(128, 256)
self.con6 = self.con_module2(256, 128)
self.linear = self.lin_module(1152,1152)
def forward(self,x):
#perform conv layers
out = self.con1(x)
out = self.con2(out)
out = self.con3(out)
out = self.con4(out)
out = self.con5(out)
out1 = self.con6(out)
out2 = out1.view(out1.size(0),-1)
out = self.linear(out2)
weight = model.linear[0].weight
print('output is : ', out2)
print('weight shape : ', weight.shape)
return out1, out2, model.linear[0].weight
def con_module1(self, in_num, out_num):
return nn.Sequential(
nn.Conv2d(in_channels = in_num, out_channels = out_num, kernel_size = 3, padding = 1),
nn.BatchNorm2d(out_num),
nn.ReLU(inplace=True)
)
def con_module2(self, in_num, out_num):
return nn.Sequential(
nn.Conv2d(in_channels = in_num, out_channels = out_num, kernel_size = 3, padding = 1),
nn.ReLU(inplace = True),
nn.MaxPool2d(kernel_size = 2, stride = 2)
)
def lin_module(self, in_num, out_num):
return nn.Sequential (
nn.Linear(in_num, out_num)
)
model = Convolutional_layer()
lamda = .001/len(train_data)
gamma = 0.001
def DSC_CNN(output, weight):
f = torch.tensor(0)
for i in range(len(weight)):
if weight[i][i] != 0:
f = torch.tensor(1e10)
break
loss = 1/2 * torch.norm(output.mT - output.mT@weight, ‘fro’)**2 + gamma * torch.sum(abs(weight)) + f + lamda/4*torch.trace((torch.square(output@output.mT - torch.eye(len(output)))))
return loss
optimizer = optim.SGD(model.parameters(),lr=learning_rate)
The loss function is in fact:
loss = 1/2*|(H - HC)|(F norm)^2 + gamma * |C|(1 norm) + d + lambda/4sum(transpose(h_i)*h_i-1)_(2 norm)**2
I am not sure where the problem is.