Problem predicting when forward depends on mini batch size

Intel_Novel · March 13, 2019, 10:31am

Hi, for the model like this:

CharNModel(
  (e): Embedding(66, 42)
  (l_in): Linear(in_features=42, out_features=256, bias=True)
  (l_hidden): Linear(in_features=256, out_features=256, bias=True)
  (l_out): Linear(in_features=256, out_features=66, bias=True)
)

and forward like this:

    def forward(self, *cs):
                    
        # in here forward do not depend on mini batch size
        one=True # just set h once without previous state

        for c in cs:
            inp = torch.relu(self.l_in(self.e(c)))
            if (one==True): h = torch.tanh(self.l_hidden(inp)); one=False
            else:h = torch.tanh(self.l_hidden(h+inp))

        return torch.log_softmax(self.l_out(h), dim=0)

I get nice predictions with this code:

m.eval()
torch.manual_seed(0)
np.random.seed(0)
bptt = 8
def get_next(inp): 
    # inp = inp[-bptt:]                              #  P  R  O  L  O  G  U  E
    idxs = np.array([char_indices[c] for c in inp ]) #[28 30 27 24 27 19 33 17]
      
    # convert to tensor
    t = torch.from_numpy(idxs).cuda() # tensor([28, 30, 27, 24, 27, 19, 33, 17], device='cuda:0')
    
    # make single tensor per char
    unb = torch.unbind(t, dim=-1) # (tensor(28, device='cuda:0'), tensor(30, device='cuda:0'), ...
    
    # p is tenosor of 66 predictions torch.Size([66])
    p = m(*unb)
    
    i = p.max(0)[1].cpu().numpy()  
    print(i) 
    return chars[i]
    
inp = "PROLOGUE"
# in this case returns '\n' char
get_next(inp)

The predicted char \n has index 2 (value -1.8741) in the tensor p: as shown:

tensor([-5.5174, -4.7152, -1.8741, -5.7296, -6.0786, -5.8575, -5.5922, -5.5996,
        -5.1968, -5.5493, -5.8919, -5.5281, -6.1660, -3.7813, -5.1541, -3.7925,
        -4.6047, -2.7959, -4.8874, -4.0734, -5.1603, -3.3921, -4.6959, -4.9327,
        -3.0011, -3.7672, -2.3908, -1.9711, -4.4944, -4.8782, -3.4857, -4.2912,
        -2.2588, -3.6031, -4.1063, -5.0520, -4.1632, -5.2944, -5.8489, -5.4463,
        -6.3238, -6.7760, -6.9522, -6.4519, -5.6051, -7.1628, -6.6192, -6.2261,
        -6.8283, -5.9125, -6.9436, -7.1909, -7.4356, -7.1347, -6.3019, -6.5737,
        -5.3044, -6.4420, -7.5483, -6.8447, -5.8424, -6.8067, -6.7659, -4.8436,
        -6.3814, -6.4360], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
torch.Size([66])

However, if I set forward to depend on mini batch size:

    def forward(self, *cs):
                    
        mbs = cs[0].size(0)
        h = torch.zeros(mbs, n_hidden).to("cuda")

        for c in cs:
            inp = torch.relu(self.l_in(self.e(c)))            
            h = torch.tanh(self.l_hidden(h+inp))

        return torch.log_softmax(self.l_out(h), dim=0)

then my prediction code looks like this:

m.eval()
torch.manual_seed(0)
np.random.seed(0)

# how many characters to predict
bptt = 8

# predict next character
def get_next(inp): 

    idxs = np.array([char_indices[c] for c in inp ])
    t = torch.from_numpy(idxs).cuda() #tensor([30, 27, 24, 27, 19, 33, 17,  3], device='cuda:0')
    unb = torch.unbind(t, dim=-1)

    # set for each element e dimension 1, (was 0)
    # else it will fail in forward
    for e in unb:
        if(e.dim()==0):
            e.unsqueeze_(0)
            
    p = m(*unb)
    # grab the index of the max element
    i = p.max(0)[1].cpu().numpy()
    return chars[i]
    
inp = "PROLOGUE"
get_next(inp)

But the prediction tensor p is now all zeros:

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='cuda:0', grad_fn=<LogSoftmaxBackward>)
torch.Size([1, 66])

I don’t get it. Why it is all zeros? The loss drop in train phase shows me I trained the model OK.

Intel_Novel · March 13, 2019, 10:53am

The reason I specified this code:

for e in unb:
        if(e.dim()==0):
            e.unsqueeze_(0)

I will get this error without it:

     25     def forward(self, *cs):
     26 
---> 27         mbs = cs[0].size(0)
     28         h = torch.zeros(mbs, n_hidden).to("cuda")
     29 

RuntimeError: dimension specified as 0 but tensor has no dimensions