Pytorch checkpoint giving poor results when loading after few days again

my pytorch checkpoints are giving bad results when i am saving and loading them again like this…

torch.save(model, ‘models/model_0.pth’)
model = torch.load(‘models/model_0.pth’)

import torch.nn.functional as F

Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model.

class DistilBERTClass(torch.nn.Module):
def init(self):
super(DistilBERTClass, self).init()
self.l1 = DistilBertModel.from_pretrained(“distilbert-base-uncased”)

    # unFreeze all layers except the last layer
    for name, param in self.l1.named_parameters():
                 param.requires_grad = True
         

    self.pre_classifier = torch.nn.Linear(768, 768)
    self.dropout = torch.nn.Dropout(0.1)
    self.classifier = torch.nn.Linear(768, 26)

def forward(self, input_ids, attention_mask, token_type_ids):
    output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
    hidden_state = output_1[0]
   # pooler = hidden_state[:, 0]
    pooler = torch.mean(hidden_state, dim=1)
    pooler = self.pre_classifier(pooler)  
    pooler = torch.nn.Tanh()(pooler)
    pooler = self.dropout(pooler)
    output = self.classifier(pooler) 
    output = F.sigmoid(output)
    return output

model = DistilBERTClass()
model.to(device)

def save_checkpoint(model, optimizer, epoch, iteration, loss, filepath=‘checkpoint.pth’):
checkpoint = {
‘epoch’: epoch,
‘iteration’: iteration,
‘model_state_dict’: model.state_dict(),
‘optimizer_state_dict’: optimizer.state_dict(),
‘loss’: loss
}
torch.save(checkpoint, filepath)

def train(epoch):
model.train()
for _, data in tqdm(enumerate(training_loader, 0)):
ids = data[‘ids’].to(device, dtype=torch.long)
mask = data[‘mask’].to(device, dtype=torch.long)
token_type_ids = data[‘token_type_ids’].to(device, dtype=torch.long)
targets = data[‘targets’].to(device, dtype=torch.float)

    outputs = model(ids, mask, token_type_ids)

    optimizer.zero_grad()
    loss = loss_fn(outputs, targets)
    if _ % 100 == 0:
        print(f'Epoch: {epoch}, Loss:  {loss.item()}')

    loss.backward()
    optimizer.step()

    # Save checkpoint after every n iterations (e.g., every 100 iterations)
    if _ % 230 == 0:
        save_checkpoint(model, optimizer, epoch, _, loss.item())

for epoch in range(EPOCHS):
train(epoch)

def validation(testing_loader):
model.eval()
fin_targets=
fin_outputs=
with torch.no_grad():
for _, data in tqdm(enumerate(testing_loader, 0)):
ids = data[‘ids’].to(device, dtype = torch.long)
mask = data[‘mask’].to(device, dtype = torch.long)
token_type_ids = data[‘token_type_ids’].to(device, dtype = torch.long)

outputs, targets = validation(testing_loader)
final_outputs = np.array(outputs) >=0.5

val_hamming_loss = metrics.hamming_loss(targets, final_outputs)
###givinig poor results

val_flat_accuracy_score = flat_accuracy(np.array(targets), np.array(final_outputs))
###givinig poor results suddnely

        targets = data['targets'].to(device, dtype = torch.float)
        outputs = model(ids, mask, token_type_ids)
        fin_targets.extend(targets.cpu().detach().numpy().tolist())
        fin_outputs.extend(outputs.cpu().detach().numpy().tolist()) 
return fin_outputs, fin_targets

##please suggest me the right way to load the model

1 Like