Different model output for same inputs with basic model

Hello,

I am new to Pytorch and I have problems with the fitting of a model for an assignment. Somehow I do not manage to get the same outputs from the model, weather in .eval or .train mode. I have the feeling I am missing something, but I can’t figure out if its related to the gradients or whatever. I have made a minimum reproducible example hereunder. Any help would be very appreciated. Thank you very much :slight_smile:


from torch.utils.data import DataLoader, TensorDataset
import torch
from torchvision import transforms
import torch.nn as nn
import random
import torch.optim as optim
from torch.utils.data import random_split
from sklearn.metrics import accuracy_score
from torchvision.models import *
from sklearn.gaussian_process.kernels import *

# pip install torch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1
RANDOM_SEED = 1111
torch.manual_seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

num_threads = 16
img_formatting = 224
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(img_formatting),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
torch.set_num_threads(num_threads)

def create_loader_from_np(X, y=None, train=True, batch_size=128, shuffle=False, num_workers=24, validation_split=0.8):
    """
    Create a torch.utils.data.DataLoader object from numpy arrays containing the data.

    input: X: numpy array, the features
           y: numpy array, the labels

    output: loader: torch.data.util.DataLoader, the object containing the data
    """
    if train:
        # Attention: If you get type errors you can modify the type of the
        # labels here
        dataset = TensorDataset(
            torch.from_numpy(X).type(torch.float),
                    torch.from_numpy(y).type(torch.long)
        )

        dataset_size = len(dataset)
        train_size = int(validation_split * dataset_size)
        val_size = dataset_size - train_size
        if validation_split > 0.0:
            train_set, val_set = random_split(dataset, [train_size, val_size])
            train_loader = DataLoader(dataset=train_set,
                                batch_size=batch_size,
                                shuffle=shuffle,
                                pin_memory=True,
                                num_workers=num_workers
                                )

            val_loader = DataLoader(dataset=val_set,
                                batch_size=batch_size,
                                shuffle=shuffle,
                                pin_memory=True,
                                num_workers=num_workers
                                )

            return train_loader, val_loader
        else:
            train_loader = DataLoader(dataset=dataset,
                                      batch_size=batch_size,
                                      shuffle=shuffle,
                                      pin_memory=True,
                                      num_workers=num_workers
                                    )

            return train_loader, None

    else:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float))
        loader = DataLoader(dataset=dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            pin_memory=True, num_workers=num_workers)
        return loader

def encoder_choice(encoder_model_name):
    if encoder_model_name == "efficient_net_b5":
        weights = EfficientNet_B5_Weights.IMAGENET1K_V1
        encoder_model = efficientnet_b5(weights=weights)
        input_size = encoder_model.classifier[-1].in_features
        encoder_model.fc = torch.nn.Identity()
        batch_size = 8
    elif encoder_model_name == "efficient_net_b7":
        weights = EfficientNet_B7_Weights.IMAGENET1K_V1
        encoder_model = efficientnet_b7(weights=weights)
        input_size = encoder_model.classifier[-1].in_features
        encoder_model.classifier[-1] = torch.nn.Identity()
        batch_size = 8
    elif encoder_model_name == "efficient_net_v2":
        weights = EfficientNet_V2_S_Weights.IMAGENET1K_V1
        encoder_model = efficientnet_v2_s(weights=weights)
        input_size = encoder_model.classifier[-1].in_features
        encoder_model.classifier[-1] = torch.nn.Identity()
        batch_size = 8

    else:
        raise (ValueError)

    return encoder_model, input_size, batch_size


class FeedForwardClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_prob):
        super(FeedForwardClassifier, self).__init__()
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.fc3(out)
        out = self.sigmoid(out)

        return out
def train_model(
        train_loader,
        val_loader,
        embeddings_model,
        embeddings_shape,
        hidden_size=64,
        output_size=1,
        L2=1e-4,
        learning_rate=1e-5,
        dropout_prob=0.2,
        n_epochs=20
):

    L1 = 1e-3
    _, input_size, _ = encoder_choice(embeddings_model)
    pretrained_model_output_size = 3 * embeddings_shape  # img_formatting ** 2
    criterion = nn.BCELoss()

    model = FeedForwardClassifier(pretrained_model_output_size, hidden_size, output_size, dropout_prob)
    model = model.to(device)  # Move the model to the desired device before creating the optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=L2)
    model.train()

    for epoch in range(n_epochs):
        epoch_loss = 0.0
        ground_truth = []
        predictions = []
        for [inputs, label] in train_loader:
            optimizer.zero_grad()

            # Forward pass
            inputs = inputs.to(device)
            label = label.to(device)
            label = label.float().view(-1, 1)
            outputs = model(inputs)

            predicted = outputs.cpu().detach().numpy()
            true_labels = label.cpu().detach().numpy()

            # Compute the loss
            outputs = outputs.float().view(-1, 1)
            loss = criterion(outputs, label)
            epoch_loss += loss
            predictions.append(predicted)
            ground_truth.append(true_labels)

            # L1 regularization
            l1_reg = torch.tensor(0., requires_grad=True)
            for param in model.parameters():
                l1_reg = l1_reg + torch.norm(param, 1)

            loss = loss + L1 * l1_reg * 0

            loss.backward()
            optimizer.step()

    predictions = np.vstack(predictions)

    return model, predictions


# Main function. You don't have to change this
if __name__ == '__main__':
    TRAIN_TRIPLETS = 'train_triplets.txt'
    TEST_TRIPLETS = 'test_triplets.txt'

    pca_variance_threshold = 1.0
    train_epochs = 1
    encoder_model_name = "efficient_net_b5"
    X, y = np.random.normal(size=(8000, 6141)), np.random.randint(0, 1, size=(
    8000, 1))
    train_loader, val_loader = create_loader_from_np(
        X,
        y,
        train=True,
        batch_size=64,
        validation_split=0.0
    )

    model, predictions = train_model(train_loader, val_loader, encoder_model_name, 2047,
                                     n_epochs=train_epochs)

    predictions_ = []
    true_labels = np.array([])
    for [x_batch, labels] in train_loader:
        x_batch = x_batch.to(device)
        predicted = model(x_batch)
        predicted = predicted.detach().cpu().numpy()
        predictions_.append(predicted.copy())
    predictions_ = np.vstack(predictions_)

    assert np.sum(predictions_ - predictions == 0.0)

I assume you are expecting to see bitwise identical results between different runs?
If so, you would need to enable deterministic algorithms as described here.