Very strange that my model automatically split the sequence and call lines in forward()
multiple times.
This is my model:
input_dim = 2
output_dim = 10
target_dim = 10class LSTMClassifier(nn.Module):
def __init__(self, input_dim, output_dim, target_dim): super(LSTMClassifier, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.target_dim = target_dim self.lstm = nn.LSTM(self.input_dim, self.output_dim) self.out2score = nn.Linear(self.output_dim, self.target_dim) def forward(self, input): _, lstm_out = self.lstm(input) #hidden_state lstm_out = lstm_out[0].squeeze() score = self.out2score(lstm_out) outclass = F.log_softmax(score, dim = 1) return outclass
also I am using Gpus
#define a net
net = LSTMClassifier(input_dim, output_dim, target_dim)
loss_function = nn.NLLLoss()
optimizer = optim.Adam(net.parameters(), lr=0.01)
#using Gpus
if torch.cuda.is_available():print("cuda is available...") if torch.cuda.device_count() > 1: print('using %d Gpus'%torch.cuda.device_count()) net=nn.DataParallel(net) net.cuda()
else:
print(‘cuda disabled’)
When I call net( )
with data shape[6, 10, 2]
where 6
is the sequence length in LSTM. I see from the vscode debug console that the self.lstm() is called three times and each time with sequence length of 2. So the final output size becomes [30, 10]
, instead of [10, 10]
, which is what I want.
Any suggestions? Thanks!