Hi! I want to use the ICNN(input convex neural network) to work as a integral function F(x).
And get the derivative function f(x) of ICNN to approximate a line(y=ax+b)
My intuition is: in my research I want to approximate a monotone function and a convex function’s derivative must be monotone increasing so maybe using ICNN can get a global minima.
For implementation, I used autograd to get the gradients of output with respect to the input
integral_x1=net(x0)
net.zero_grad()
integral_x1[:,0].backward(torch.ones(batch_size).to(device),retain_graph=True)
x1=x0.grad
And take the MSE loss between labels and x1. Then I do the Adam update.
But this ICNN can’t approximate the integral of an affine funtion.
Does any one tried combining the gradient of a neural network into the loss and do Adam or SGD?
# Last modified: Feb 25 2020 by Jiaojiao Fan
# What's this filed used for?
# I am testing: using the gradient to combine into the loss function
# And see whether this can approximate a line from dataInitial_line.npy
from network_initial import fNet
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
trainset=np.load('./data/dataInitial_line.npy')
trainset=torch.from_numpy(trainset).float()
trainloader=DataLoader(trainset,batch_size=1000,
shuffle=True,num_workers=0)
batch_size=1000
net=fNet()
net.to(device)
ws_loss=nn.MSELoss()
# optimizer=optim.SGD(net.parameters(),lr=0.00001,momentum=.999)
optimizer=optim.Adam(net.parameters(), lr=0.0001)
for epoch in range(20):
running_loss=0.0
for i,data in enumerate(trainloader,0):
x0=data[:,0]
x0=x0.to(device)
x0=Variable(x0,requires_grad=True)
labels=data[:,1]
labels=labels.to(device)
integral_x1=net(x0)
net.zero_grad()
integral_x1[:,0].backward(torch.ones(batch_size).to(device),retain_graph=True)
x1=x0.grad
x1=Variable(x1,requires_grad=True) #This is added
loss = ws_loss(labels,x1)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# clamp some constraints for Fnet
with torch.no_grad():
net.fc1.weight.masked_scatter_(net.fc1.weight<0,torch.zeros(net.fc1.weight.shape).to(device))
net.fc2.weight.masked_scatter_(net.fc2.weight<0,torch.zeros(net.fc2.weight.shape).to(device))
running_loss+=loss.item()
if i%50==49:
print('[%2d %5d] loss:%.3f' %
(epoch+1,i+1,running_loss/50))
running_loss=0.0