Tuning Hyperparameters with HyperOpt during Validation

hfdp · May 13, 2020, 4:20pm

Hello,

I am trying to tune my hyperparameters for a CNN that I build. However, I need to tune my hyperparameters (such as learning rate and momentum) during validation (which takes 10% of the entire dataset). This needs to be done using HyperOpt.
Can somebody help me since I am quite new to Pytorch itself.
The code can be seen below and any help is welcome, thanks

The validation where the tuning needs to be performed can be found towards the end of the code

# -*- coding: utf-8 -*-
"""

"""

from torch.utils import data
from model_architecture import MyModel
from dataloader import Dataset
import numpy as np
import torch.optim as optim
from joblib import Parallel, delayed
import torch
import os
import torch.nn as nn

model = MyModel()

print(model)

#%% Here we train and evaluate the model
        
saved_models_path = os.getcwd() + '/models/'


# Check if CUDA is available
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
# cudnn.benchmark = True


# Set training parameters
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 6}
max_epochs = 100
core_num = 1
tune_every = 10 # test and validate the model every x epochs

data_library = {}
prior = '20MHz'
target = '250MHz'


# Load all the data from the txt file
file_IDs = open('ID_list.txt','r').read().split('\n')
file_IDs = file_IDs[:-1] # remove last line
complete_dataset = Dataset(file_IDs)


# create your optimizer
optimizer = optim.SGD(model.parameters(), lr=0.0003, momentum = 0.1)

#%% Here we train the network

# Divide the dataset into the training and validation set
lengths = [int(np.ceil(len(complete_dataset)*0.8)), int(np.floor(len(complete_dataset)*0.1)), int(np.floor(len(complete_dataset)*0.1))]
training_set, validation_set, evaluation_set = torch.utils.data.random_split(complete_dataset, lengths)
training_generator = data.DataLoader(training_set, **params)
validation_generator = data.DataLoader(validation_set, **params)
evaluation_generator = data.DataLoader(evaluation_set, **params)


# instantiate the model to make it a double tensor
forward_model = model.double()

# # check if model works for random instance of the data 
# t20, t100 ,t250 = next(iter(training_generator))
# one_prediction = forward_model(t20)


loss_function = nn.MSELoss()


for param in forward_model.parameters():
    param.requires_grad = True

def train(low_res, high_res):
# Loop over epochs
    global forward_model, optimizer
    low_res, high_res = low_res.to(device), high_res.to(device)
    optimizer.zero_grad()
    prediction_training = forward_model(low_res)
    loss = loss_function(prediction_training, high_res) #Here compute tha value that estimates how far the outut is from the target
    print(loss)
    loss.backward() #The whole graph is differentiatted wrt the loss and all ensors in the graph that have 'required_gard = true' will have their .grad tensot accumulaed ith the gradient. 
    optimizer.step()


def val(low_res, high_res):
    global forward_model, optimizer
    low_res, high_res = low_res.to(device), high_res.to(device)
    optimizer.zero_grad()
    prediction_training = forward_model(low_res)
    loss = loss_function(prediction_training, high_res) #Here compute tha value that estimates how far the outut is from the target







for epoch in range(max_epochs):
    print('Epoch : ' + str(epoch))    
   	# Training
    forward_model.train()
    with torch.set_grad_enabled(True):
        for data_library['20MHz'], data_library['100MHz'], data_library['250MHz'] in training_generator:
            if(core_num>1):
                Parallel(n_jobs=core_num)(delayed(train)(data_library[prior],data_library[target])) 
            else:
                train(data_library[prior],data_library[target])
            # Parallel(n_jobs=core_num)
            # print(data_library['20MHz'].shape)
            # delayed(train)(data_library[prior],data_library[target])
           
            
           
            
           
    if epoch % tune_every == 0 and epoch > 0:
   	# validation
       forward_model.eval()
       with torch.set_grad_enabled(False):
            for data_library['20MHz'], data_library['100MHz'], data_library['250MHz'] in validation_generator:
                if(core_num>1):
                    Parallel(n_jobs=core_num)(delayed(train)(data_library[prior],data_library[target]))
                else:
                    val(data_library[prior],data_library[target])
            # HYPERPARAMETER TUNING GOES IN HERE

        
        
torch.save(forward_model.state_dict(), os.path.join(saved_models_path + 'conv_net_model.pt'))