I’m trying to train a model composed by a CNN and a LSTM but during the training phase there are no weights update. I have read posts in this forum for days but I cannot figure out what is wrong with my code.
Here it is my model:
from coral_pytorch.layers import CoralLayer
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
dr_rate= 0.2
pretrained = True
rnn_hidden_size = 30
rnn_num_layers = 2
#get a pretrained vgg19 model ( taking only the cnn layers and fine tun them)
baseModel = models.vgg19(pretrained=pretrained).features
i = 0
for child in baseModel.children():
if i < 28:
for param in child.parameters():
param.requires_grad = False
else:
for param in child.parameters():
param.requires_grad = True
i +=1
num_features = 25088
self.baseModel = baseModel
self.dropout= nn.Dropout(dr_rate)
self.rnn = nn.LSTM(num_features, rnn_hidden_size, rnn_num_layers , batch_first=True)
self.fc1 = CoralLayer(size_in=30, num_classes=5)
def forward(self, x):
batch_size, time_steps, C, H, W = x.size()
# reshape input to be (batch_size * timesteps, input_size)
x = x.contiguous().view(batch_size * time_steps, C, H, W)
x = self.baseModel(x)
x = x.view(x.size(0), -1)
#make output as ( samples, timesteps, output_size)
x = x.contiguous().view(batch_size , time_steps , x.size(-1))
x , (hn, cn) = self.rnn(x)
##### Use CORAL layer #####
logits1 = self.fc1(x[:, -1, :])
probas1 = torch.sigmoid(logits1)
###--------------------------------------------------------------------###
return logits1, probas1
And that’s the training code:
model = Net().to(device)
optimizer = optim.Adam(model.parameters(),lr=0.001)
num_epochs = 10
for epoch in range(num_epochs):
model = model.train()
for batch_idx, (features, label1) in enumerate(train_dataloader):
##### Convert class labels for CORAL
levels1 = levels_from_labelbatch(label1, num_classes=5).to(device)
features = features.to(device)
logits1, probas1 = model(features)
#### CORAL loss
loss = coral_loss(logits1, levels1)
#print(logits1.dtype, levels1.dtype, loss.dtype) # they are all tensors
optimizer.zero_grad()
a = list(model.parameters())[1].clone()
loss.backward(retain_graph=True)
optimizer.step()
b = list(model.parameters())[1].clone()
print(torch.equal(a,b))
print(list(model.parameters())[0].grad)
### LOGGING
if not batch_idx % batch_size:
print ('Epoch: %02d/%02d | Batch %02d/%02d | Loss: %.4f'
%(epoch+1, num_epochs, batch_idx,
len(train_dataloader), loss))
A partial output:
Equals : True
Grad : None
Epoch: 10/10 | Batch 00/03 | Loss: 2.4990
The input tensor has a shape of (2, 10, 3, 224, 224) corresponding to batch_sizeXframesXchannelsXheightXwidth. I’m also using a custom loss and layer from this work: GitHub - Raschka-research-group/coral-pytorch: CORAL and CORN implementations for ordinal regression with deep neural networks.
I will appreciate any advice you can give me!
Additional info: I have used Coral with a MLP using the same training and it updates the weights so I think that it is a model problem