Multiple forward passes on the same input - detach or clone?

thomasbbrunner · August 16, 2022, 12:22pm

I have some inputs which I want to pass multiple times through my model. This model is stochastic, i.e. the outputs are different for each pass. Something like this:

# multiple forward passes
x = [...]
outputs = []
for i in range(num_passes):
    input = x  # TODO clone? detach?
    outputs.append(model(input))

# backpropagation through outputs
(...)

I want the gradient graph for each of the passes to be from the input to the corresponding output.

Do I have to call clone() or detach() in the inputs to make sure that the gradients flow back properly?

Here’s an example script that can be run:

Script

import numpy as np
import torch

device = "cuda"
num_passes = 5

# generate dataset
X = torch.Tensor(np.linspace(-10, 10, 100)[..., None]).to(device)
Y = torch.sin(X).to(device)

class MLP(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Linear(1, 64),
            torch.nn.LeakyReLU(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(64, 64),
            torch.nn.LeakyReLU(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(64, 1)).to(device)

    def forward(self, x):
        self.train()
        predictions = torch.zeros((num_passes, *x.shape), device=device)
        for i in range(num_passes):
            input = x  # TODO clone? detach?
            predictions[i] = self.model(input)
        return predictions


# repeat desired output to match shape of predictions
# (num_passes, ...)
Y_repeated = Y.unsqueeze(0).repeat_interleave(num_passes, 0)
model = MLP()
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for epoch in range(100):
    optimizer.zero_grad()
    preds = model(X)
    loss = loss_fn(preds, Y_repeated)
    loss.backward()
    optimizer.step()