Different output on running same test data and same model

asking28 · May 20, 2021, 1:05pm

I trained a model for sequence to sequence labeling purpose. After training, I saved using save_dict function. To test I load the model and its states and then turn it to eval mode using model.eval(). After that I run the same model on same set of input and get different results everytime. I am attaching code snippet and outputs for reference. Kindly help

selection_summ=SentenceSelectionSumm(emb_dim,vocab_size,conv_hidden,lstm_hidden,lstm_layer,bool(1),embedding_weights).to(device)
PATH="/group-volume/orc_srib/abhi3.singh/wikihow/model/extractor/ckpt-30000-4"
selection_summ.load_state_dict(torch.load(PATH))
selection_summ.eval()
for batch in val_loader:

    b=prepro_fn(50,10,batch)
    b_c=batch_converter(b)

    batchify_out=batchify(b_c)

    out=selection_summ.forward(batchify_out[0][0],batchify_out[0][1],batchify_out[1])
    print(out.size())
    print(batchify_out[1][0][1])
    print(torch.round(out.squeeze())[0])
    
    break

Output on 1st run

Output on 2nd run

My model function

class SentenceSelectionSumm(nn.Module):
    def __init__(self,emb_dim,vocab_size,conv_hidden,lstm_hidden,lstm_layer,bidirectional,embedding,dropout=0.2):
        super().__init__()
        self._sent_enc=ConvSentEncoder(vocab_size,emb_dim,conv_hidden,dropout,embedding)
        self.bi_lstm_enc=LSTMEncoder(3*conv_hidden,lstm_hidden,lstm_layer,dropout=dropout,bidirectional=bidirectional)
        self.lin_2=nn.Linear(10,1)
        self.lin_1=nn.Linear(self.bi_lstm_enc.hidden_size*(2 if self.bi_lstm_enc.bidirectional else 1),10)
    def forward(self,article_sents,sent_nums,target):
        enc_out=self._encode(article_sents,sent_nums)
        linear_sigmoid_out=self.linear_out(enc_out)
        return linear_sigmoid_out
    def _encode(self,article_sents,sent_nums):
        if sent_nums is None:
            enc_sent=self._sent_encode(article_sents[0]).unsqueeze(0)
        else:
            max_n=max(sent_nums)
            enc_sents=[self._sent_enc(art_sent) for art_sent in article_sents]
            def zero(n,device):
                z=torch.zeros(n,self.bi_lstm_enc.input_size).to(device)
                return z
            enc_sent=torch.stack([torch.cat([s,zero(max_n-n,s.device)],dim=0)
                                 if n!=max_n
                                 else s
                                 for s,n in zip(enc_sents,sent_nums)],
                                dim=0
                                )
            lstm_out=self.bi_lstm_enc(enc_sent,sent_nums)
            return lstm_out
    def linear_out(self,enc_out):
#         enc_out=enc_out.to(device)
        
        lin_1_out=self.lin_1(enc_out)
        
        lin_2_out=torch.sigmoid(self.lin_2(lin_1_out))
        return lin_2_out
def loss_fun(output,target,pad_idx=-1.0):
    assert output.size()[:-1]==target.size()
    target=target.view(output.size()).to(torch.float)
    mask=target!=-1.0
    masked_target=target.masked_select(mask)
    masked_out=output.masked_select(mask)
    loss=binary_cross_loss(masked_out,masked_target)
    return loss
def save_model(model,path,step,epoch):
    name='ckpt-{}-{}'.format(step,epoch)
    
    torch.save(model.state_dict(),join(path,name))

ptrblck · May 21, 2021, 9:01am

Could you remove the round operation and check the out tensor directly?
Since you might not use deterministic algorithms, it would be interesting to see if small numerical errors could move the rounded output.

asking28 · May 21, 2021, 9:24am

Thanks @ptrblck , for helping. I just figured out my mistake. I was using nn.Functional.Droput inplace of nn.Dropout where I have to exclusively set Dropout to False along with model.eval().