Hi, for the model like this:
CharNModel(
(e): Embedding(66, 42)
(l_in): Linear(in_features=42, out_features=256, bias=True)
(l_hidden): Linear(in_features=256, out_features=256, bias=True)
(l_out): Linear(in_features=256, out_features=66, bias=True)
)
and forward like this:
def forward(self, *cs):
# in here forward do not depend on mini batch size
one=True # just set h once without previous state
for c in cs:
inp = torch.relu(self.l_in(self.e(c)))
if (one==True): h = torch.tanh(self.l_hidden(inp)); one=False
else:h = torch.tanh(self.l_hidden(h+inp))
return torch.log_softmax(self.l_out(h), dim=0)
I get nice predictions with this code:
m.eval()
torch.manual_seed(0)
np.random.seed(0)
bptt = 8
def get_next(inp):
# inp = inp[-bptt:] # P R O L O G U E
idxs = np.array([char_indices[c] for c in inp ]) #[28 30 27 24 27 19 33 17]
# convert to tensor
t = torch.from_numpy(idxs).cuda() # tensor([28, 30, 27, 24, 27, 19, 33, 17], device='cuda:0')
# make single tensor per char
unb = torch.unbind(t, dim=-1) # (tensor(28, device='cuda:0'), tensor(30, device='cuda:0'), ...
# p is tenosor of 66 predictions torch.Size([66])
p = m(*unb)
i = p.max(0)[1].cpu().numpy()
print(i)
return chars[i]
inp = "PROLOGUE"
# in this case returns '\n' char
get_next(inp)
The predicted char \n
has index 2
(value -1.8741
) in the tensor p
: as shown:
tensor([-5.5174, -4.7152, -1.8741, -5.7296, -6.0786, -5.8575, -5.5922, -5.5996,
-5.1968, -5.5493, -5.8919, -5.5281, -6.1660, -3.7813, -5.1541, -3.7925,
-4.6047, -2.7959, -4.8874, -4.0734, -5.1603, -3.3921, -4.6959, -4.9327,
-3.0011, -3.7672, -2.3908, -1.9711, -4.4944, -4.8782, -3.4857, -4.2912,
-2.2588, -3.6031, -4.1063, -5.0520, -4.1632, -5.2944, -5.8489, -5.4463,
-6.3238, -6.7760, -6.9522, -6.4519, -5.6051, -7.1628, -6.6192, -6.2261,
-6.8283, -5.9125, -6.9436, -7.1909, -7.4356, -7.1347, -6.3019, -6.5737,
-5.3044, -6.4420, -7.5483, -6.8447, -5.8424, -6.8067, -6.7659, -4.8436,
-6.3814, -6.4360], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
torch.Size([66])
However, if I set forward to depend on mini batch size:
def forward(self, *cs):
mbs = cs[0].size(0)
h = torch.zeros(mbs, n_hidden).to("cuda")
for c in cs:
inp = torch.relu(self.l_in(self.e(c)))
h = torch.tanh(self.l_hidden(h+inp))
return torch.log_softmax(self.l_out(h), dim=0)
then my prediction code looks like this:
m.eval()
torch.manual_seed(0)
np.random.seed(0)
# how many characters to predict
bptt = 8
# predict next character
def get_next(inp):
idxs = np.array([char_indices[c] for c in inp ])
t = torch.from_numpy(idxs).cuda() #tensor([30, 27, 24, 27, 19, 33, 17, 3], device='cuda:0')
unb = torch.unbind(t, dim=-1)
# set for each element e dimension 1, (was 0)
# else it will fail in forward
for e in unb:
if(e.dim()==0):
e.unsqueeze_(0)
p = m(*unb)
# grab the index of the max element
i = p.max(0)[1].cpu().numpy()
return chars[i]
inp = "PROLOGUE"
get_next(inp)
But the prediction tensor p
is now all zeros:
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
device='cuda:0', grad_fn=<LogSoftmaxBackward>)
torch.Size([1, 66])
I don’t get it. Why it is all zeros? The loss drop in train phase shows me I trained the model OK.