Hi,
I tried something a bit unsual. It is working, but sadly there is a memory leak. Maybe someone can help me finding it. Basicially I tried to create a gradient based integration function:
Fit a model with the gradient values of a function. Here I am fitting both, the y values and the gradients in parallel.
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch import FloatTensor as FT
def parabel(a, b, c, x):
return a*x**2 + b*x + c
class ParabelModel(nn.Module):
def __init__(self, start_params=[1, 1, 0]):
super().__init__()
self.params = nn.Parameter(FT(start_params))
def forward(self, x):
a, b, c = self.params
return parabel(a, b, c, x)
def print_usage(device):
if device.type == 'cuda':
actual_mem = torch.cuda.memory_allocated()
max_mem = torch.cuda.max_memory_allocated()
print(f"{actual_mem/max_mem :0.2f} actual: {actual_mem:.2e} max: {max_mem:.2e}")
return
a, b, c = -2, 3, 2
N = 100
device = torch.device('cuda')
# create data
xs_train = torch.linspace(-1, 1, N, requires_grad=True, device=device)
ys_train = parabel(a, b, c, xs_train)
ys_train.backward(torch.ones_like(ys_train))
dx_train = xs_train.grad
xs_train = xs_train.detach()
ys_train = ys_train.detach()
l1 = 1
l2 = 0.00
lr = 0.5
model = ParabelModel([2, 1, 0]).to(device)
optimizer = optim.AdamW(model.parameters(), lr=lr)
for epoch in range(400):
xs = xs_train.clone().detach().requires_grad_(True)
ys_pred = model(xs)
# calculate gradients of the model wrt the inputs
ys_pred.backward(torch.ones_like(ys_pred), create_graph=True)
dx_pred = xs.grad
loss_grad = l1 * F.mse_loss(dx_pred, dx_train)
model.zero_grad()
loss_grad.backward()
loss = l2 * F.mse_loss(ys_pred, ys_train)
loss.backward()
with torch.no_grad():
optimizer.step()
optimizer.zero_grad()
if not epoch % 50:
print_usage(device)
print(model.params.data)
This gives:
0.63 actual: 6.14e+03 max: 9.73e+03
tensor([1.4900, 1.4950, 0.0000], device=âcuda:0â)
0.41 actual: 5.73e+04 max: 1.39e+05
tensor([-2.1686, 3.0714, 0.0000], device=âcuda:0â)
0.57 actual: 1.09e+05 max: 1.90e+05
tensor([-1.9972, 2.9907, 0.0000], device=âcuda:0â)
0.66 actual: 1.60e+05 max: 2.41e+05
tensor([-1.9885, 2.9921, 0.0000], device=âcuda:0â)
0.72 actual: 2.11e+05 max: 2.92e+05
tensor([-1.9894, 2.9928, 0.0000], device=âcuda:0â)
0.76 actual: 2.62e+05 max: 3.44e+05
tensor([-1.9906, 2.9937, 0.0000], device=âcuda:0â)
0.79 actual: 3.13e+05 max: 3.95e+05
tensor([-1.9915, 2.9943, 0.0000], device=âcuda:0â)
0.82 actual: 3.65e+05 max: 4.46e+05
tensor([-1.9922, 2.9947, 0.0000], device=âcuda:0â)
Itâs convergeng to the expected values a, b and c but the memory grows.
Thanks