Hey guys I have tried to train a very simple 1D normalizing flow model with differential privacy by adapting the code from link. The code is very simple.
This is the model:
class Flow1d(nn.Module):
def __init__(self, n_components):
super(Flow1d, self).__init__()
self.mus = nn.Parameter(torch.randn(n_components), requires_grad=True)
self.log_sigmas = nn.Parameter(torch.zeros(n_components), requires_grad=True)
self.weight_logits = nn.Parameter(torch.ones(n_components), requires_grad=True)
def forward(self, x):
x = x.view(-1,1)
weights = self.weight_logits.softmax(dim=0).view(1,-1)
distribution = Normal(self.mus, self.log_sigmas.exp())
z = (distribution.cdf(x) * weights).sum(dim=1)
dz_by_dx = (distribution.log_prob(x).exp() * weights).sum(dim=1)
return z, dz_by_dx
This is the data:
def generate_mixture_of_gaussians(num_of_points):
n = num_of_points // 2
gaussian1 = np.random.normal(loc=-1, scale=0.25, size=(n,))
gaussian1 = torch.from_numpy(gaussian1)
gaussian1 = gaussian1.unsqueeze(1)
gaussian2 = np.random.normal(loc=0.5, scale=0.5, size=(num_of_points-n,))
gaussian2 = torch.from_numpy(gaussian2)
gaussian2 = gaussian2.unsqueeze(1)
return torch.cat((gaussian1, gaussian2), dim=0).to(torch.float32)
class NumpyDataset(data.Dataset):
def __init__(self, array):
super().__init__()
self.array = array
def __len__(self):
return len(self.array)
def __getitem__(self, index):
return self.array[index]
n_train, n_test = 2000, 1000
train_data = generate_mixture_of_gaussians(n_train)
test_data = generate_mixture_of_gaussians(n_test)
train_loader = data.DataLoader(NumpyDataset(train_data), batch_size=128, shuffle=True)
test_loader = data.DataLoader(NumpyDataset(test_data), batch_size=128, shuffle=True)
These are the train and loss functions:
def loss_function(target_distribution, z, dz_by_dx):
log_likelihood = target_distribution.log_prob(z) + dz_by_dx.log()
return -log_likelihood.mean()
def train(model, train_loader, optimizer, target_distribution):
model.train()
for x in train_loader:
z, dz_by_dx = model(x)
loss = loss_function(target_distribution, z, dz_by_dx)
optimizer.zero_grad()
loss.backward()
optimizer.step()
And executing just this code:
target_distribution = Uniform(0.0, 1.0)
model = Flow1d(n_components=5)
optimizer = torch.optim.Adam(model.parameters(), 5e-3)
# optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-1)
epochs = 100
delta = 1/2000.
privacy_engine = PrivacyEngine()
model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
module=model,
optimizer=optimizer,
data_loader=train_loader,
epochs=epochs,
target_epsilon=0.1,
target_delta=delta,
max_grad_norm=1.2,
)
gives the error:
C:\Users\MFRI\anaconda3\envs\deep_learning\Lib\site-packages\opacus\accountants\analysis\prv\prvs.py:50: RuntimeWarning: invalid value encountered in log
z = np.log((np.exp(t) + q - 1) / q)
Trying to execute the training after initializing the privacy engine gives the error:
"C:\Users\MFRI\anaconda3\envs\deep_learning\Lib\site-packages\opacus\optimizers\optimizer.py", line 282, in _get_flat_grad_sample
raise ValueError(
ValueError: Per sample gradient is not initialized. Not updated in backward pass?
I suspect that this has to do with the invalid value error above. I couldn’t trace back why I sometimes get the negative “t” values in the runtimewarning error which causes the “z” value to be “nan”. I would greatly appreciate any help!