Optimize Code for Supervised Learning

Karsten · April 19, 2019, 8:23pm

Hi,
I’m programming a neural network that should give exact output if a human is sick or healthy. Given are 250 pieces of data with 5 properties and then the resulting state of health. Based on my program I could achieve 88% accuracy. How can i improve it? Here is the code:

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F


def train(D):
   # Hyper-parameters
    n_steps = 50
    learning_rate = 0.025
    weight_decay = 0.005
    input_size = 13
    output_size = 1
    
    train, test = D[:round(len(D)*1)], D[round(len(D)*0.2):]

    # Traindata
    X = train[:, :-1].astype(np.float32)
    y = train[:, -1].astype(np.float32)
    X_train = torch.from_numpy(X)
    y_train = torch.from_numpy(y)
    feature_means = torch.mean(X_train, dim=0)

    # Testdata
    X = test[:, :-1].astype(np.float32)
    y = test[:, -1].astype(np.float32)
    X_test = torch.from_numpy(X)
    y_test = torch.from_numpy(y)
    
    # Definition of the model
    class Model(nn.Module):
        def __init__(self, input_size):
            super(Model, self).__init__()
            neur = 13
            self.layers = nn.Sequential(
                nn.Linear(input_size, neur),
                nn.BatchNorm1d(neur, momentum = 0.03),
                nn.ReLU(),
                #nn.Dropout(0.01),
                nn.Linear(neur, neur),
                nn.PReLU(),
                nn.Linear(neur, neur),
                nn.ReLU(),
                #nn.Dropout(0.02),
                nn.Linear(neur, neur),
                nn.LogSigmoid(),
                nn.Linear(neur, neur),
                nn.PReLU(),
                nn.Softplus(),
                nn.ReLU(),
                #nn.Dropout(0.05),
                nn.Linear(neur, neur),
                nn.BatchNorm1d(neur, momentum = 0.01),
                nn.PReLU(),
                nn.Linear(neur, neur),
                nn.ReLU(),
                nn.Dropout(0.02),
                nn.Linear(neur, output_size),
            )
        def forward(self, x):
            x = x - feature_means
            out = self.layers(x)
            return out
        
        """def num_flat_features(self, x):
            size = x.size()[1:]
            num = 1
            for i in size:
                num *= i
            return num"""
    
    model = Model(input_size)

    # optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.85, 0.875), eps=1e-16, weight_decay=weight_decay, amsgrad=False)
    
    # training
    for e in range(n_steps):
        # forward pass
        outputs = model.forward(X_train)[:, 0]  # Xw (linear layer)
        loss = criterion(outputs, y_train)  # sigmoid and cross-entropy loss

        # backward pass (automatically computes gradients)
        optimizer.zero_grad()  # reset gradients (torch accumulates them)
        loss.backward()  # computes gradients
        
        # Learning_rate decay
        #learning_rate -= 0.0001
        
        # Regularisation
        l1_crit = nn.L1Loss(size_average=False)
        reg_loss = 0
        for parameter in model.parameters():
            reg_loss += l1_crit(parameter, loss)
        factor = 0.00025
        loss += factor * reg_loss
        
        # Adaptive Learning Rate Decay
        lr = learning_rate * (0.75 ** ((e // 10))) # Alle 10 Epochs lr ändern
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr # vom Optimizer die lr = learning_rate ändern
            
            
        # Adaptive Weight Rate Decay
        w_d = weight_decay * (0.5 ** ((e // 10))) #Alle 10 Epochs weight_decay ändern
        for param_group in optimizer.param_groups:
            param_group['weight_decay'] = w_d # vom Optimizer die weight_decay ändern
        
        def closure():
            optimizer.zero_grad()
            output = model.forward(X_train)[:, 0]
            loss = criterion(output, y_train)
            loss.backward()
            
            l1_crit = nn.L1Loss(size_average=False)
            reg_loss = 0
            for parameter in model.parameters():
                reg_loss += l1_crit(parameter, loss)
            factor = 0.00025
            loss += factor * reg_loss
            
            return loss
        
        # Optimizer-step
        optimizer.step(closure)
        
        # Train Accuracy
        outputs = model.forward(X_train)[:, 0]
        pred_y = outputs > 0
        is_correct = torch.eq(pred_y, y_train.byte()).float()
        accuracy_train = torch.mean(is_correct).item()

        # Test Accuracy
        outputs = model.forward(X_test)[:, 0]
        pred_y = outputs > 0
        is_correct = torch.eq(pred_y, y_test.byte()).float()
        accuracy_test = torch.mean(is_correct).item()
    
    return model

We achieved an accuracy between 79% and 86%.
Thank you very much

michaelklachko · April 19, 2019, 10:16pm

Looks like you’re training on the entire data, and testing on a subset of it. Can’t do that.

Karsten · April 23, 2019, 10:30am

Thanks Michael,
my approach would be

train_size = int(0.8 * len(D))
test_size = len(D) - train
train, test = torch.utils.data.random_split(D, [train_size, test_size])

instead of

train, test = D[:round(len(D)*1)], D[round(len(D)*0.2):]

Unfortunately, this doesn’t work if I try to implement it in my program. How could I fix this?
Thanks for your answer