Hi there,
I came across a problem.I try to train a net but only some of my parameters are updated and I don’t know why.
Here is my code.
class Model(nn.Module):
def __init__(self,input_word_size,embedding_size,hidden_size,padding_index,embedding = None,use_cuda = False):
super(Model,self).__init__()
self.input_word_size = input_word_size
self.embedding_size = embedding_size
self.hidden_size = hidden_size
self.use_cuda = use_cuda
self.embedding = nn.Embedding(input_word_size,embedding_size,padding_idx=padding_index)
if embedding is not None:
self.embedding.weight = embedding
self.context_gru = nn.GRU(embedding_size,hidden_size,batch_first = True)
self.response_gru = nn.GRU(embedding_size,hidden_size,batch_first = True)
self.w = nn.Linear(hidden_size,hidden_size)
def forward(self,x,y):
x_emb = self.embedding(x)# x_emb:(batch_size,len,embedding_size)
y_emb = self.embedding(y)# y_emb:(batch_size,len,embedding_size)
batch_size = x_emb.shape[0]
x_hidden = torch.randn(1,x.shape[0],self.hidden_size)
y_hidden = torch.randn(1,x.shape[0],self.hidden_size)
if self.use_cuda:
x_hidden = x_hidden.cuda()
y_hidden = y_hidden.cuda()
x_hiddens = []
y_hiddens = []
x_output,x_hidden = self.context_gru(x_emb,x_hidden)
y_output,y_hidden = self.response_gru(y_emb,y_hidden)
context = x_hidden.transpose(0,1)
response = y_hidden.transpose(0,1).transpose(1,2)
# the operation of linear layer is below
result = self.w(context)
result = torch.bmm(result,response)
result = F.sigmoid(result)
return result
and after I do the operation below:
a = model.w.weight.clone()
optimizer.zero_grad()
result = model(x,y,label)
loss = loss_function(result,label) # loss function is BCELoss
loss.backward()
optimizer.step()
b = model.w.weight.clone()
print(torch.equal(a,b)) # I got True here, which means the weight has not changed
# and the value of model.w.weight.grad is not None
What should I do to fix this?Should I change my network?