Efficient sum of distributions (mixture distribution) and sampling

I manage to implement adaptive sampling by drawing samples iteratively from an update distribution which is represented by a mixture of two distributions, such that

ρ_new = a * ρ_old + (1 - a) * ρ_add

where ρ_old is uniform distribution at first iteration and ρ_add is always normal distribution.

It goes well on the one dimensional case, a pytorch code for illustrattion is as follows

import torch
import torch.distributions as dist
import numpy as np

def inverse_cdf_sampling(num_samples, cdf, x):
  uniform_samples = torch.rand(num_samples)
  indices = torch.searchsorted(cdf, uniform_samples, right=True)
  sampled_values = x[indices.clamp(max=x.size(0) - 1)]

  return sampled_values.to(device)

def CDF_update(lam, PDF, x):
  asr  =  0.8
  mean, std  =  lam
  new_dist  =  dist.Normal(torch.tensor([mean]), torch.tensor([std]))

  pdf_new  =  torch.exp(new_dist.log_prob(x))

  weighted_sum = asr * PDF + (1 - asr) * pdf_new
  normalized_pdf = weighted_sum / torch.sum(weighted_sum) * x.size(0) / (x[-1] - x[0])

  cdf = torch.cumsum(normalized_pdf, dim=0)
  normalized_cdf = cdf / cdf[-1]

  return normalized_pdf, normalized_cdf

PDF  =  dist.Uniform(torch.tensor([0.1 + 1e-4]), torch.tensor([3]))

x    =  torch.linspace(0.1 + 1e-4, 3, 1000).double()
PDF  =  torch.exp(PDF.log_prob(x))
CDF  =  torch.cumsum(PDF, dim=0)

lambda_val  =  [0, 1]
PDF,CDF     =  CDF_update(lambda_val, PDF, x)

samples     =  inverse_cdf_sampling(100, CDF, x)

In this code, I normalize the mixture distribution to ensure CDF within [0,1], and sample inversely from the mixture CDF. However, this trick becomes extremely computational expensive when it comes to the bivariate case:

def bivariate_inverse_cdf_sampling(num_samples, cdf, x, y):
    cdf_flattened = cdf.flatten()
    uniform_samples = torch.rand(num_samples)
    indices = torch.searchsorted(cdf_flattened, uniform_samples, right=True)
    indices = indices.clamp(max=cdf_flattened.size(0) - 1)

    rows = indices // y.size(0)
    cols = indices % y.size(0)

    sampled_values_x = x[rows]
    sampled_values_y = y[cols]

    return sampled_values_x.to(device), sampled_values_y.to(device)

def CDF_update_bivariate(mean, cov, PDF, x, y):
    asr  =  0.9
    new_dist = dist.MultivariateNormal(torch.tensor(mean), torch.tensor(cov))

    X, Y = torch.meshgrid(x, y, indexing='ij')
    grid = torch.stack((X.flatten(), Y.flatten()), dim=1)

    pdf_new = torch.exp(new_dist.log_prob(grid)).reshape(500, 500)
    weighted_sum = asr * PDF + (1 - asr) * pdf_new
    normalized_pdf = weighted_sum / torch.sum(weighted_sum) * x.size(0) * y.size(0) / ((x[-1] - x[0]) * (y[-1] - y[0]))

    cdf = torch.cumsum(torch.cumsum(normalized_pdf, dim=0), dim=1)
    normalized_cdf = cdf / cdf[-1, -1]

    return normalized_pdf, normalized_cdf

x = torch.linspace(0.1 + 1e-4, 3, 500).double()
sk_max  =  np.exp(3 * 0.01 / np.sqrt(1 - 0.8**2))
sk_min  =  np.exp(-3 * 0.01 / np.sqrt(1 - 0.8**2))
y = torch.linspace(sk_min, sk_max, 500).double()
X, Y = torch.meshgrid(x, y, indexing='ij')

area = (x[-1] - x[0]) * (y[-1] - y[0])
PDF  = torch.full(X.shape, 1.0 / area)

cdf_x = (X - x[0]) / (x[-1] - x[0])
cdf_y = (Y - y[0]) / (y[-1] - y[0])
CDF   = cdf_x * cdf_y

mean        =  [0, 0]
cov         =  [[1, 0], [0, 1]]
PDF,CDF     =  CDF_update_bivariate(mean, cov, PDF, x, y)

sp1,sp2  =  bivariate_inverse_cdf_sampling(100, CDF, x, y)

So I wonder is there any solution to make distribution update and sampling efficient and easy to compute?