Multinomial logit - loss oscillates

Hi everyone,

I am relatively new to Pytorch. I try to estimate parameters of a multinomial logit. I use Pytorch because I have a lot of observations and my problem is rather complex. I found strange results so I decided to test my code with a really simple logit problem with simulated data. However even with these simulated data and this totally simple probleme my loss still oscillates and my weight does not converge to the true value of my simulated data… I tested my data with Stata a statistical software package and it works perfectly fine (Stata finds the true value in less than one second). So I know the problem dont come from the simulated data.

here is my code:

import pandas as pd
import numpy as np
import os
from torch.utils.data import Dataset
import torch
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import functional as F
from time import time
from torch.optim import Adam, SGD , ASGD , lr_scheduler
from tqdm.autonotebook import tqdm, trange
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter

Hyperparameters

LEARNING_RATE = 0.1
OPTIMIZER_CONSTRUCTOR = SGD

BATCH_SIZE = 32
WORKERS = 0

path =[…]

class RebateData(Dataset):
def init(
self, file
):
super().init()

    self.file = torch.tensor(pd.read_csv(file).values , dtype = int)
    
def __getitem__(self, i):

    typeo = self.file[i]
    return [typeo]


def __len__(self):
    return len(self.file)

class Model(nn.Module):

def __init__(self):
    super().__init__()
    self.dwelling_bias = nn.Parameter(torch.tensor([-0.5], dtype = torch.float) )


def forward(self, typeo):
    utility = (torch.cat([torch.zeros(1), self.dwelling_bias])[None, :]* torch.ones(typeo.size()[0])[:,None])[:,:,None]
    prob = F.softmax(utility, dim=1)

    avg_prob_choice = prob[torch.arange(len(typeo[:,0])), typeo[:,0],:].mean(-1)
    return -torch.log(avg_prob_choice ).mean()

dataset = RebateData( file = path + “try.csv” )
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=WORKERS,
shuffle=True, pin_memory=True, )
model = Model()

optimizer = OPTIMIZER_CONSTRUCTOR(model.parameters() , lr=LEARNING_RATE)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
‘max’ , verbose = True , patience = 1 )
t = 0

log_name = f"{LEARNING_RATE}, {OPTIMIZER_CONSTRUCTOR.name}"
runs = “C:\Users\l\Desktop\runs”
writer = SummaryWriter(log_dir=f"{runs}{log_name}")

try:
for epoch in range(10):
for batch in dataloader:
optimizer.zero_grad()
loss = model(*batch)
loss.backward()
print(model.dwelling_bias.grad)
optimizer.step()
scheduler.step(loss)
print(“lr”,epoch,optimizer.param_groups[0][‘lr’])
if ( optimizer.param_groups[0][‘lr’] < 0.001) :
raise StopIteration
t+=1
## if t % 20 == 0 :
## print("dwelling type : ancien ",model.dwelling_bias.detach().numpy())
## print(“sigma”,model.sigma.detach().numpy())
## print("loss ",loss)
## writer.add_scalar(‘sigma’, model.sigma.detach().numpy(), global_step=t)
## print(writer)
writer.add_scalar(‘dwelling type : ancien’, model.dwelling_bias.detach().numpy(), global_step=t)
writer.add_scalar(‘learning rate’, optimizer.param_groups[0][‘lr’], global_step=t)
writer.add_scalar(‘loss’, loss, global_step=t)

except StopIteration: pass
writer.close()

The true value of dwelling bias in my data is -1, but Pytorch cant find it.

If anyone can help me on this,

Thanks a lot in advance…

This is an old post and it’s a bit hard to read the code in this format, but usually oscillations in simple models come from a learning rate that is too large. You may also want to use a learning rate scheduler to decrease the learning rate over time.