My work consists in training several neural networks architectures by varying the width(number of neurons), and performing the NTK linear approximation for each architecture,and saving the last resulted train and test losses
Here is the main code :
from functorch import make_functional,make_functional_with_buffers, vmap, vjp, jvp, jacrev
os.environ["CUDA_VISIBLE_DEVICES"] ="1"
device=torch.device("cuda" if torch.cuda.is_available() else "cpu"), help='device')
for k in tqdm(range(1,1300)):
# print("********** TRAINING WIDTH: ",k)
model=NN(11,1,k)
model.to("cuda")
fnet, params ,buffers =make_functional_with_buffers(model)
# loss
loss_fn = torch.nn.MSELoss()
# optim = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)
# optimizer
optim = torch.optim.Adam(model.parameters(), lr=args.lr)
for epoch in range(args.epochs):
epoch_train_loss,epoch_test_loss=train_from_loader(train_loader,validation_loader,model,loss_fn,optim,k-args.start_size,device)
saved_values["Train_Errors"][k-1,epoch]=epoch_train_loss
saved_values["Test_Errors"][k-1,epoch]=epoch_test_loss
# ntk approx
train_kernel_loss,test_kernel_loss=ntk_prediction(train_loader,validation_loader,loss_fn,fnet, params ,buffers,device)
saved_values["Test_kernel_Errors"][k-1]=test_kernel_loss
saved_values["Train_kernel_Errors"][k-1]=train_kernel_loss
the NN class :
class NN(nn.Module):
def __init__(self,input_size,output_size,width):
super(NN, self).__init__()
self.layernorm=nn.LayerNorm(input_size)
self.linear1=nn.Linear(input_size, width)
self.linear2=nn.Linear(width, width)
self.linear4=nn.Linear(width, output_size)
def forward(self, x):
x = self.layernorm(x)
x = self.linear1(x)
x = nn.functional.relu(x)
x = self.linear2(x)
x = nn.functional.relu(x)
x = self.linear4(x)
# x = nn.functional.relu(x)
return x
the ntk linear approximation part is using 100% of the GPU (result of : watch nvidia-smi), but the neural network training part is using 20-30 % of the GPU, I guess that it’s because the model size is not too big,
Here is the training function I’m calling :
def train_from_loader(train_loader,test_loader,model,loss_fn,optim,k,device):
train_loss = 0
batch_train_loss=0
# Train data with nn
model.train()
for i,(X_train,Y_train) in enumerate(train_loader):
X_train,Y_train = X_train.to(device),Y_train.to(device)
output = model.forward(X_train)
batch_train_loss = loss_fn(torch.squeeze(output),Y_train)
optim.zero_grad()
batch_train_loss.backward()
optim.step()
train_loss += batch_train_loss.data
epoch_train_loss= train_loss/len(train_loader)
# Validation data with nn
loss_test=0
batch_val_loss=0
model.eval()
# b=0
with torch.no_grad():
for j,(X_test,Y_test) in enumerate(test_loader):
X_test, Y_test = X_test.to(device), Y_test.to(device)
output = model.forward(X_test)
batch_val_loss = loss_fn(torch.squeeze(output) ,Y_test)
loss_test += batch_val_loss.data
epoch_test_loss=loss_test/len(test_loader)
return epoch_train_loss,epoch_test_loss
How can I increase the GPU usage for the training part?