Problem with Dataloader uses in LSTM

I used Dataloader to generate batch_size for example bs=512 and set seq_len = 11 but when feed to my model bs become seq_len and seq_len become bs in the training part. Any help thank

Here is my model

class lstm(nn.Module):
    def __init__(self,vocab_size,hidden_dim,n_classes):
        self.n_classes = n_classes
        self.hidden_dim = hidden_dim
        self.hidden = self.init_hidden(bs)
        self.e = nn.Embedding(vocab_size,n_fac)
        self.lstm = nn.LSTM(n_fac,hidden_dim)
        self.out = nn.Linear(hidden_dim,n_classes)
    def forward(self,cs):
        bs =cs[0].size(0)
        print('bs: ',bs)
        #output e = 11,n_fac

        e = self.e(cs)
        #lstm need seq,bs,input_size
        out_lstm,h = self.lstm(e.view(bs,-1,n_fac))
        #output is seq,bs,hid*num_layer
        #out need sq,hidden
        out = self.out(out_lstm)
        return F.log_softmax(out,dim=-1)

    def init_hidden(self,bs):
        return (autograd.Variable(torch.zeros(1,bs,self.hidden_dim)),autograd.Variable(torch.zeros(1,bs,self.hidden_dim)))
def nll_loss_seq(inp,targ):
    sl,bs,nh = inp.size()
    targ = targ.transpose(0,1).contiguous().view(-1)
    return F.nll_loss(inp.view(-1,nh),targ.view(-1))

iter = 0
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    for phase in ['train', 'val']:
        if phase == 'train':
            loader =train_loader
            dataset_size= loader.dataset.len
            loader =val_loader
            dataset_size= loader.dataset.len
        running_loss = 0.0
        running_corrects = 0
        for data in dataloader:
            inputs = data
            inputs = Variable(inputs)
            print('input: ',inputs.size())
            bs =inputs
            print('bs: ',bs)


            outputs = model(inputs)
            print('out: ',outputs.size())

            loss = nll_loss_seq(outputs,labels)

#             loss = criterion(outputs,labels)
            if phase == 'train':

            running_loss +=[0]*inputs.size(0)

        epoch_loss =( running_loss / dataset_size)
        print('{} Loss: {:.4f}'.format(phase, epoch_loss))
1 Like

The LSTM normally expects the batch dimension to come second, but I guess your dataloader is providing data with the batch dimension first.

If I have correctly understood the problem this should correct it.

self.lstm = nn.LSTM(n_fac,hidden_dim, batch_first=True)
1 Like