Tuning Hyperparameters with HyperOpt during Validation

Hello,

I am trying to tune my hyperparameters for a CNN that I build. However, I need to tune my hyperparameters (such as learning rate and momentum) during validation (which takes 10% of the entire dataset). This needs to be done using HyperOpt.
Can somebody help me since I am quite new to Pytorch itself.
The code can be seen below and any help is welcome, thanks :smiley:

The validation where the tuning needs to be performed can be found towards the end of the code :slight_smile:

# -*- coding: utf-8 -*-
"""

"""

from torch.utils import data
from model_architecture import MyModel
from dataloader import Dataset
import numpy as np
import torch.optim as optim
from joblib import Parallel, delayed
import torch
import os
import torch.nn as nn

model = MyModel()

print(model)

#%% Here we train and evaluate the model
        
saved_models_path = os.getcwd() + '/models/'


# Check if CUDA is available
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
# cudnn.benchmark = True


# Set training parameters
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 6}
max_epochs = 100
core_num = 1
tune_every = 10 # test and validate the model every x epochs

data_library = {}
prior = '20MHz'
target = '250MHz'


# Load all the data from the txt file
file_IDs = open('ID_list.txt','r').read().split('\n')
file_IDs = file_IDs[:-1] # remove last line
complete_dataset = Dataset(file_IDs)


# create your optimizer
optimizer = optim.SGD(model.parameters(), lr=0.0003, momentum = 0.1)

#%% Here we train the network

# Divide the dataset into the training and validation set
lengths = [int(np.ceil(len(complete_dataset)*0.8)), int(np.floor(len(complete_dataset)*0.1)), int(np.floor(len(complete_dataset)*0.1))]
training_set, validation_set, evaluation_set = torch.utils.data.random_split(complete_dataset, lengths)
training_generator = data.DataLoader(training_set, **params)
validation_generator = data.DataLoader(validation_set, **params)
evaluation_generator = data.DataLoader(evaluation_set, **params)


# instantiate the model to make it a double tensor
forward_model = model.double()

# # check if model works for random instance of the data 
# t20, t100 ,t250 = next(iter(training_generator))
# one_prediction = forward_model(t20)


loss_function = nn.MSELoss()


for param in forward_model.parameters():
    param.requires_grad = True

def train(low_res, high_res):
# Loop over epochs
    global forward_model, optimizer
    low_res, high_res = low_res.to(device), high_res.to(device)
    optimizer.zero_grad()
    prediction_training = forward_model(low_res)
    loss = loss_function(prediction_training, high_res) #Here compute tha value that estimates how far the outut is from the target
    print(loss)
    loss.backward() #The whole graph is differentiatted wrt the loss and all ensors in the graph that have 'required_gard = true' will have their .grad tensot accumulaed ith the gradient. 
    optimizer.step()


def val(low_res, high_res):
    global forward_model, optimizer
    low_res, high_res = low_res.to(device), high_res.to(device)
    optimizer.zero_grad()
    prediction_training = forward_model(low_res)
    loss = loss_function(prediction_training, high_res) #Here compute tha value that estimates how far the outut is from the target







for epoch in range(max_epochs):
    print('Epoch : ' + str(epoch))    
   	# Training
    forward_model.train()
    with torch.set_grad_enabled(True):
        for data_library['20MHz'], data_library['100MHz'], data_library['250MHz'] in training_generator:
            if(core_num>1):
                Parallel(n_jobs=core_num)(delayed(train)(data_library[prior],data_library[target])) 
            else:
                train(data_library[prior],data_library[target])
            # Parallel(n_jobs=core_num)
            # print(data_library['20MHz'].shape)
            # delayed(train)(data_library[prior],data_library[target])
           
            
           
            
           
    if epoch % tune_every == 0 and epoch > 0:
   	# validation
       forward_model.eval()
       with torch.set_grad_enabled(False):
            for data_library['20MHz'], data_library['100MHz'], data_library['250MHz'] in validation_generator:
                if(core_num>1):
                    Parallel(n_jobs=core_num)(delayed(train)(data_library[prior],data_library[target]))
                else:
                    val(data_library[prior],data_library[target])
            # HYPERPARAMETER TUNING GOES IN HERE

        
        
torch.save(forward_model.state_dict(), os.path.join(saved_models_path + 'conv_net_model.pt'))