RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: - CVAE

I am not sure where the modification is done in place. Here is my code. Can someone help me identify the error?

class CVAE(nn.Module):
    def __init__(self, ydim, latent_dim, **kwargs):
        super().__init__()

        self.ydim = ydim
        self.latent_dim = latent_dim

        # Encoder
        self.enc = nn.Sequential(
            nn.Conv1d(ydim, 32, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(32, 64, 5, stride=2, padding=2),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 64, 5, stride=2, padding=2),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 64, 5, stride=2, padding=2),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 64, 5, stride=2, padding=2),
        )

        # Dense layers for mapping to latent space
        self.dense_in_mu = nn.Sequential(
            nn.ReLU(inplace=True),
            nn.Linear(64*64, latent_dim),
        )

        self.dense_in_std = nn.Sequential(
            nn.ReLU(inplace=True),
            nn.Linear(64*64, latent_dim),
        )
        # Dense layers for mapping from latent space
        self.dense_out = nn.Sequential(
            nn.Linear(latent_dim, 64*64),
            nn.ReLU(inplace=True)
        )

        # Decoder
        self.dec = nn.Sequential(
            nn.ConvTranspose1d(64, 64, 5, stride=2, padding=2, output_padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose1d(64, 64, 5, stride=2, padding=2, output_padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose1d(64, 64, 5, stride=2, padding=2, output_padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose1d(64, 32, 5, stride=2, padding=2, output_padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(32, ydim, 3, padding=1),
        )

        # Critic (Loss Function)
        self.crit = nn.MSELoss()

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def objective(self, obshat, true_observations, mu, logvar):
        reconstruction_loss = self.crit(obshat, true_observations)
        kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        return reconstruction_loss + kl_divergence

    def forward(self, x, t, return_latent=True):
        # Encoding
        x = self.enc(x)
        dshape = x.shape
        x = x.view(len(x), -1)
        z_mu = self.dense_in_mu(x)
        z_logvar = self.dense_in_std(x)  # Assuming same dense_in layer for both mu and logvar
        z = self.reparameterize(z_mu, z_logvar)

        # Decoding
        x = self.dense_out(z)
        x = x.view(*dshape)
        x = self.dec(x)

        if return_latent:
            return x, z_mu, z_logvar

        return x

This is the traceback:

    Variable._execution_engine.run_backward
(  # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: 
[torch.cuda.DoubleTensor [256, 4096]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. 
Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

This helped resolved the issue (model is the pytorch model):

    for mod in model.modules():
        if hasattr(mod, "inplace"):
            print(mod)
            print(f"Setting inplace=False for module: {mod}")
            mod.inplace=False