Getting different results after loading the saved model

Basu_Jindal · November 18, 2021, 5:53pm

A few days ago, I saved a model using torch.save(model, PATH) and uploaded to gdrive. Now, when I load the model, I am getting different results. Before saving, I was getting a Chamfer Distance of 0.0017 (A metric I am using for training, the lower it is, the better) but now around 0.0350 if I use model.eval() before running and 0.0200 if I use model.train().

Here’s my code:

class Transform(nn.Module):
   def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv1d(128*3+128,512,1)
        self.conv2 = nn.Conv1d(512,512,1)
        self.conv3 = nn.Conv1d(512,512,1)
        self.conv4 = nn.Conv1d(512,1024,1)
       

        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(512)
        self.bn3 = nn.BatchNorm1d(512)
        # self.bn4 = nn.BatchNorm1d(512)

   def forward(self, inp_global):

        xb = F.relu(self.bn1(self.conv1(inp_global)))
        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = F.relu(self.bn3(self.conv3(xb)))
        xb = self.conv4(xb)
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        output = nn.Flatten(1)(xb)
        return output


class GlobalEncode(nn.Module):
   def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv1d(3,64,1)
        self.conv2 = nn.Conv1d(64,64,1)
        self.conv3 = nn.Conv1d(64,128,1)
        self.conv4 = nn.Conv1d(128,128,1)

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(64)
        self.bn3 = nn.BatchNorm1d(128)
        # self.bn4 = nn.BatchNorm1d(512)

       
   def forward(self, input):

        xb = F.relu(self.bn1(self.conv1(input)))
        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = F.relu(self.bn3(self.conv3(xb)))
        xb = self.conv4(xb)
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        return nn.Flatten(1)(xb)

class PointCloud(nn.Module):
    def __init__(self):
        super().__init__()
        self.encode = Transform()
        self.global_encode = GlobalEncode()

        self.nb_heads = 8
        self.conv1 = nn.Conv1d(in_channels=1024*self.nb_heads, 
                               out_channels=1024*self.nb_heads, kernel_size=1, 
                               groups=self.nb_heads)
        self.conv2 = nn.Conv1d(in_channels=1024*self.nb_heads, 
                               out_channels=1024*self.nb_heads, kernel_size=1, 
                               groups=self.nb_heads)
        self.conv3 = nn.Conv1d(in_channels=1024*self.nb_heads, 
                               out_channels=256*3*self.nb_heads, kernel_size=1, 
                               groups=self.nb_heads)
        self.conv4 = nn.Conv1d(in_channels=256*3*self.nb_heads, 
                               out_channels=256*3*self.nb_heads, kernel_size=1, 
                               groups=self.nb_heads)

        

        self.bn1 = nn.BatchNorm1d(1024*self.nb_heads)
        self.bn2 = nn.BatchNorm1d(1024*self.nb_heads)
        self.bn3 = nn.BatchNorm1d(256*3*self.nb_heads)
        self.dp = nn.Dropout(p=0.2)
        

        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 8)
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, input, input_knn):
        inp_global = self.global_encode(input).unsqueeze(1)
        inp_global = inp_global.repeat(1,2048,1)
        xb = torch.cat((input_knn, inp_global), dim = 2).transpose(1,2)
        enc = self.encode(xb)

        xb = enc.repeat(1,8).unsqueeze(2)
        xb = F.relu(self.conv1(xb))
        xb = F.relu(self.conv2(xb))
        xb = F.relu(self.conv3(xb))
        xb = self.conv4(xb)
        output = xb.reshape(-1, 2048,3)

        xb = self.dropout(F.relu(self.fc1(enc)))
        xb = self.dropout(F.relu(self.fc2(xb)))
        xb = F.relu(self.fc3(xb))
        labels = self.fc4(xb)

        return output, labels

ptrblck · November 19, 2021, 7:24am

Directly saving the model is generally not recommended as loading it could easily break if you’ve changed the code in the meantime.
I don’t know what might have caused the different outputs, but did you store the state_dict by chance and could reload it?

Basu_Jindal · November 19, 2021, 8:45am

Thank you for the reply.
I didn’t save the state_dict , moreover even while using model.eval(), I am getting different results if I shuffle the dataset while validation. Could it be because of BatchNorm or Dropout?

ptrblck · November 19, 2021, 9:52am

That shouldn’t be the case. Could you post the input shapes you are using, which would reproduce the different outputs even after calling model.eval()?