Different model output for same inputs with basic model

JoachimB · May 2, 2024, 11:25am

Hello,

I am new to Pytorch and I have problems with the fitting of a model for an assignment. Somehow I do not manage to get the same outputs from the model, weather in .eval or .train mode. I have the feeling I am missing something, but I can’t figure out if its related to the gradients or whatever. I have made a minimum reproducible example hereunder. Any help would be very appreciated. Thank you very much


from torch.utils.data import DataLoader, TensorDataset
import torch
from torchvision import transforms
import torch.nn as nn
import random
import torch.optim as optim
from torch.utils.data import random_split
from sklearn.metrics import accuracy_score
from torchvision.models import *
from sklearn.gaussian_process.kernels import *

# pip install torch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1
RANDOM_SEED = 1111
torch.manual_seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

num_threads = 16
img_formatting = 224
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(img_formatting),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
torch.set_num_threads(num_threads)

def create_loader_from_np(X, y=None, train=True, batch_size=128, shuffle=False, num_workers=24, validation_split=0.8):
    """
    Create a torch.utils.data.DataLoader object from numpy arrays containing the data.

    input: X: numpy array, the features
           y: numpy array, the labels

    output: loader: torch.data.util.DataLoader, the object containing the data
    """
    if train:
        # Attention: If you get type errors you can modify the type of the
        # labels here
        dataset = TensorDataset(
            torch.from_numpy(X).type(torch.float),
                    torch.from_numpy(y).type(torch.long)
        )

        dataset_size = len(dataset)
        train_size = int(validation_split * dataset_size)
        val_size = dataset_size - train_size
        if validation_split > 0.0:
            train_set, val_set = random_split(dataset, [train_size, val_size])
            train_loader = DataLoader(dataset=train_set,
                                batch_size=batch_size,
                                shuffle=shuffle,
                                pin_memory=True,
                                num_workers=num_workers
                                )

            val_loader = DataLoader(dataset=val_set,
                                batch_size=batch_size,
                                shuffle=shuffle,
                                pin_memory=True,
                                num_workers=num_workers
                                )

            return train_loader, val_loader
        else:
            train_loader = DataLoader(dataset=dataset,
                                      batch_size=batch_size,
                                      shuffle=shuffle,
                                      pin_memory=True,
                                      num_workers=num_workers
                                    )

            return train_loader, None

    else:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float))
        loader = DataLoader(dataset=dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            pin_memory=True, num_workers=num_workers)
        return loader

def encoder_choice(encoder_model_name):
    if encoder_model_name == "efficient_net_b5":
        weights = EfficientNet_B5_Weights.IMAGENET1K_V1
        encoder_model = efficientnet_b5(weights=weights)
        input_size = encoder_model.classifier[-1].in_features
        encoder_model.fc = torch.nn.Identity()
        batch_size = 8
    elif encoder_model_name == "efficient_net_b7":
        weights = EfficientNet_B7_Weights.IMAGENET1K_V1
        encoder_model = efficientnet_b7(weights=weights)
        input_size = encoder_model.classifier[-1].in_features
        encoder_model.classifier[-1] = torch.nn.Identity()
        batch_size = 8
    elif encoder_model_name == "efficient_net_v2":
        weights = EfficientNet_V2_S_Weights.IMAGENET1K_V1
        encoder_model = efficientnet_v2_s(weights=weights)
        input_size = encoder_model.classifier[-1].in_features
        encoder_model.classifier[-1] = torch.nn.Identity()
        batch_size = 8

    else:
        raise (ValueError)

    return encoder_model, input_size, batch_size


class FeedForwardClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_prob):
        super(FeedForwardClassifier, self).__init__()
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.fc3(out)
        out = self.sigmoid(out)

        return out
def train_model(
        train_loader,
        val_loader,
        embeddings_model,
        embeddings_shape,
        hidden_size=64,
        output_size=1,
        L2=1e-4,
        learning_rate=1e-5,
        dropout_prob=0.2,
        n_epochs=20
):

    L1 = 1e-3
    _, input_size, _ = encoder_choice(embeddings_model)
    pretrained_model_output_size = 3 * embeddings_shape  # img_formatting ** 2
    criterion = nn.BCELoss()

    model = FeedForwardClassifier(pretrained_model_output_size, hidden_size, output_size, dropout_prob)
    model = model.to(device)  # Move the model to the desired device before creating the optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=L2)
    model.train()

    for epoch in range(n_epochs):
        epoch_loss = 0.0
        ground_truth = []
        predictions = []
        for [inputs, label] in train_loader:
            optimizer.zero_grad()

            # Forward pass
            inputs = inputs.to(device)
            label = label.to(device)
            label = label.float().view(-1, 1)
            outputs = model(inputs)

            predicted = outputs.cpu().detach().numpy()
            true_labels = label.cpu().detach().numpy()

            # Compute the loss
            outputs = outputs.float().view(-1, 1)
            loss = criterion(outputs, label)
            epoch_loss += loss
            predictions.append(predicted)
            ground_truth.append(true_labels)

            # L1 regularization
            l1_reg = torch.tensor(0., requires_grad=True)
            for param in model.parameters():
                l1_reg = l1_reg + torch.norm(param, 1)

            loss = loss + L1 * l1_reg * 0

            loss.backward()
            optimizer.step()

    predictions = np.vstack(predictions)

    return model, predictions


# Main function. You don't have to change this
if __name__ == '__main__':
    TRAIN_TRIPLETS = 'train_triplets.txt'
    TEST_TRIPLETS = 'test_triplets.txt'

    pca_variance_threshold = 1.0
    train_epochs = 1
    encoder_model_name = "efficient_net_b5"
    X, y = np.random.normal(size=(8000, 6141)), np.random.randint(0, 1, size=(
    8000, 1))
    train_loader, val_loader = create_loader_from_np(
        X,
        y,
        train=True,
        batch_size=64,
        validation_split=0.0
    )

    model, predictions = train_model(train_loader, val_loader, encoder_model_name, 2047,
                                     n_epochs=train_epochs)

    predictions_ = []
    true_labels = np.array([])
    for [x_batch, labels] in train_loader:
        x_batch = x_batch.to(device)
        predicted = model(x_batch)
        predicted = predicted.detach().cpu().numpy()
        predictions_.append(predicted.copy())
    predictions_ = np.vstack(predictions_)

    assert np.sum(predictions_ - predictions == 0.0)

ptrblck · May 2, 2024, 4:50pm

I assume you are expecting to see bitwise identical results between different runs?
If so, you would need to enable deterministic algorithms as described here.