The same network loaded by two different functions is not equivalent, why?

Capo_Mestre · August 31, 2021, 12:23pm

Hello,
I use two different custom functions to load the network parameters. I load the network into two different variables ‘network1’ and ‘network2’.
When I compare them as network1 == network2, the result is False even though state-dictionaries seem to coincide, although I havent checked every single entry. Is this a supposed behaviour?

These are the function for saving and loading the network state dictionaries (one pair of function is save_network() and load_network(), and another pair of functions is save_network_params() and load_network_params()).

def save_network(self, model, network_parameters, input_depth, conv_layer_activations, linear_layer_activations, comment):
        checkpoint = dict(network_parameters)  # or orig.copy()
        extra = {"input_depth": input_depth,
                 "conv_activation_functions": conv_layer_activations,
                 "fc_activation_functions": linear_layer_activations,
                 "state_dict": model.state_dict()}
        checkpoint.update(extra)
        print()
        print()
        print(checkpoint["state_dict"])
        torch.save(checkpoint, os.path.join(self.path_params,"_r" + str(self.run_count) + "_e" + str(self.epoch_count) + ".pth"))
        return self.subfolder
    
    def load_network(self,date_,run,epoch):
        
        print("Available subfolders:")
        print(np.transpose(next(os.walk(self.params_root))[1]))
        print()

        run_epoch,filenames = self.retrieve_run_epoch_from_filenames(date_)
        
        print("Files in " + "[" + date_ + "]")
        print(filenames)
        
        # find the index of the corresponding filename in filenames-list
        for row, sublist in enumerate(run_epoch):
            if (sublist[0] == run) & (sublist[1] == epoch):
                idx_ = row
                
        filename = filenames[idx_]
        
        folder = os.path.join(self.params_root,date_)
        
        checkpoint = torch.load(os.path.join(folder,filename))
        
        model = Network3(checkpoint["input_depth"], 
                    checkpoint["conv_layer_out_channels"], 
                    checkpoint["conv_layer_kernel_sizes"], 
                    checkpoint["conv_layer_strides"], 
                    checkpoint["conv_layer_paddings"], 
                    checkpoint["max_pool_layer_numbers"], 
                    checkpoint["max_pool_kernel_sizes"], 
                    checkpoint["max_pool_strides"], 
                    checkpoint["max_pool_paddings"], 
                    checkpoint["batch_norm_2d_layer_numbers"],
                    checkpoint["dropout2d_layer_numbers"],
                    checkpoint["dropout2d_probabilities"], 
                    checkpoint["linear_layer_out_features"],                    
                    checkpoint["batch_norm_1d_layer_numbers"],
                    checkpoint["conv_activation_functions"],
                    checkpoint["fc_activation_functions"],
                    checkpoint["dropout1d_layer_numbers"],
                    checkpoint["dropout1d_probabilities"]
                    )
        
        model.load_state_dict(checkpoint['state_dict'])
        print("\nLoaded: ")
        print(filename)
        # model.eval()
        return model
        
    def save_network_params(self,model,comment):
        print()
        print()
        print(model.state_dict())
        torch.save(model.state_dict(), os.path.join(self.path_params,"_r" + str(self.run_count) + "_e" + str(self.epoch_count) + ".pt"))
        # torch.save(model.state_dict(), os.path.join(self.path_params,comment + "_r" + str(self.run_count) + "_e" + str(self.epoch_count) + ".pt"))
        return self.subfolder
        
    def load_network_params(self,date_,model,run,epoch):
        
        print("Available subfolders:")
        print(np.transpose(next(os.walk(self.params_root))[1]))
        print()

        run_epoch,filenames = self.retrieve_run_epoch_from_filenames(date_)
        
        print("Files in " + "[" + date_ + "]")
        print(filenames)
        
        # find the index of the corresponding filename in filenames-list
        for row, sublist in enumerate(run_epoch):
            if (sublist[0] == run) & (sublist[1] == epoch):
                idx_ = row
                
        filename = filenames[idx_]
        
        folder = os.path.join(self.params_root,date_)
        model.load_state_dict(torch.load(os.path.join(folder,filename)))
        print("\nLoaded: ")
        print(filename)
        model.eval()

Then, I use them as follows:

network3 = Network3(input_depth, conv_layer_out_channels, 
                    conv_layer_kernel_sizes, 
                    conv_layer_strides, 
                    conv_layer_paddings, 
                    max_pool_layer_numbers, 
                    max_pool_kernel_sizes, 
                    max_pool_strides, 
                    max_pool_paddings, 
                    batch_norm_2d_layer_numbers,
                    dropout2d_layer_numbers,
                    dropout2d_probabilities, 
                    linear_layer_out_features,                    
                    batch_norm_1d_layer_numbers,
                    conv_layer_activations,
                    linear_layer_activations,
                    dropout1d_layer_numbers,
                    dropout1d_probabilities
                    )

network3.eval()

network_parameters = {"conv_layer_out_channels": conv_layer_out_channels,
                      "conv_layer_kernel_sizes": conv_layer_kernel_sizes,
                      "conv_layer_strides": conv_layer_strides,
                      "conv_layer_paddings": conv_layer_paddings,
                      "max_pool_layer_numbers": max_pool_layer_numbers,
                      "max_pool_kernel_sizes": max_pool_kernel_sizes,
                      "max_pool_strides": max_pool_strides,
                      "max_pool_paddings": max_pool_paddings,
                      "batch_norm_2d_layer_numbers": batch_norm_2d_layer_numbers,
                      "dropout2d_layer_numbers": dropout2d_layer_numbers,
                      "dropout2d_probabilities": dropout2d_probabilities,
                      "linear_layer_out_features": linear_layer_out_features,
                      "batch_norm_1d_layer_numbers": batch_norm_1d_layer_numbers,
                      "dropout1d_layer_numbers": dropout1d_layer_numbers,
                      "dropout1d_probabilities": dropout1d_probabilities
                      }

mmm1 = RunManager('test', 'test')

dat = mmm1.save_network_params(network3, 'comment')
network1 = network3
mmm1.load_network_params_2( date_=dat, model=network1, run=0, epoch=0)

mmm2 = RunManager('test', 'test')

dat = mmm2.save_network(network3, network_parameters, input_depth, conv_layer_activations, linear_layer_activations, 'test_save')
network2 = mmm2.load_network(dat, 0, 0)
network2.eval()

Then I print comparisons:

print(network3 == network_loaded2)
print(network1 == network_loaded2)
print(network3 == network_loaded1)

And the results are

False
False
True

Why do I have the first two False?
@ptrblck Would you have any ideas/suggestions? Can it be the case the the two networks are located at different addresses and this makes them not equal or something like that?

ptrblck · August 31, 2021, 7:00pm

I assume you would like to compare the parameters and buffers of these models to make sure they re “equal”? If so, you would have to compare these tensors directly as I think you are now comparing the ids of the Python objects instead.