Hi there.
I’m using PyTorch for making a CNN model to classify images (labels are 0 & 1).
My problem is when I’m using torch.flatten() in forward function, model parameters become zero.
For example, here is my model structure and training pipeline:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.output_layer = nn.Linear(in_features=3*224*224, out_features=1, bias=True).to(device).requires_grad_(True)
def forward(self, X):
X = torch.flatten(X, start_dim=1, end_dim=-1)
output = self.output_layer(X)
output = torch.sigmoid(output)
return output
dataset = torch.rand(size=(1000, 3, 224, 224))
labels = torch.randint(low=0, high=2, size=(1000,)).type(torch.float)
for split_idx in kfold_split:
idx_train, idx_test = split_idx
X_train = dataset[idx_train].to(device)
y_train = labels[idx_train].to(device)
X_test = dataset[idx_test].to(device)
y_test = labels[idx_test].to('cpu').numpy().squeeze()
model = Model()
optimizer = optim.Adam(params=model.parameters())
for epoch in range(epochs):
error_epoch = list()
X_idx_shuffle = list(range(X_train.shape[0]))
np.random.shuffle(X_idx_shuffle)
X_train = X_train[X_idx_shuffle]
y_train = y_train[X_idx_shuffle]
for batch in range(batch_size, X_train.shape[0] + batch_size, batch_size):
X_batch = X_train[batch - batch_size : batch]
y_batch = y_train[batch - batch_size : batch]
output = model(X_batch).squeeze()
error = bce(output, y_batch)
optimizer.zero_grad()
error.backward()
optimizer.step()
print(torch.mean(model.output_layer.weight.grad))
print(torch.mean(model.output_layer.bias.grad))
print('----------------')
And results are:
tensor(0.)
tensor(0.)
----------------
tensor(0.)
tensor(0.)
----------------
tensor(0.)
tensor(0.)
----------------
tensor(0.)
tensor(0.)
----------------
tensor(0.)
tensor(0.)
----------------
tensor(0.)
tensor(0.)
----------------
tensor(0.)
tensor(0.)
----------------
tensor(0.)
tensor(0.)
----------------
tensor(0.)
tensor(0.)
----------------
tensor(0.)
tensor(0.)
----------------
tensor(0.)
tensor(0.)
----------------
But in
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.output_layer = nn.Linear(in_features=100, out_features=1, bias=True).to(device).requires_grad_(True)
def forward(self, X):
X = torch.flatten(X, start_dim=1, end_dim=-1)
output = self.output_layer(X)
output = torch.sigmoid(output)
return output
dataset = torch.rand(size=(1000, 100))
labels = torch.randint(low=0, high=2, size=(1000,)).type(torch.float)
I have below outputs:
tensor(0.0947)
tensor(0.1923)
----------------
tensor(0.0865)
tensor(0.1689)
----------------
tensor(0.0848)
tensor(0.1677)
----------------
tensor(0.0521)
tensor(0.1082)
----------------
tensor(0.0570)
tensor(0.1132)
----------------
tensor(0.0731)
tensor(0.1455)
----------------
I appreciate for any guidance.