Hi all, I am trying to compare different optimizer on a NN, however, the L-BFGS algorithm does not work and I don’t know why. The loss is not decreasing and my accuracy is very bad. SGD and Adam do work, so I wonder where my mistake is.
Here is my code:
#Load packages
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
import torch.nn.functional as F“”“Load MNIST dataset”“”
train_dataset = dsets.MNIST(root = ‘./data’, train=True,
transform = transforms.ToTensor(),
download = True)
test_dataset = dsets.MNIST(root = ‘./data’, train=False,
transform = transforms.ToTensor())“”“Make dataset iterable”“”
batch_size=1000train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size,
shuffle = True)test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size,
shuffle = False)“”“Create model class”“”
class FFN(nn.Module):
def init(self):
super(FFN, self).init()
#Linear functions
self.fc1 = nn.Linear(28*28, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)def forward(self, x): out = F.relu(self.fc1(x)) #Non-linearity, can be changed to Tanh,ReLu out = F.relu(self.fc2(out)) #Linear function (readout) out = self.fc3(out) return out
“”“Instantiate Model and Optimizer”“”
model = FFN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.LBFGS(model.parameters())“”“Train Model”“”
epochs = 5
for epoch in range(epochs):
for i, (images, labels) in enumerate(train_loader):
#Load images as Variables
images = Variable(images.view(-1, 28*28))
labels = Variable(labels)def closure(): #Clear gradients, not be accumulated optimizer.zero_grad() #Forward pass to get output outputs = model(images) #Calculate Loss: softmax + cross entropy loss loss = criterion(outputs, labels) #Get gradients loss.backward() return loss #update parameters optimizer.step(closure) print('Epoch: {}, Loss: {}'.format(epoch, loss.data[0])) if (i+1) % 100 == 0: #Calculate accuracy on testset correct = 0 total = 0 #Iterate through test data set for images, labels in test_loader: #Load images to a Torch Variable images = Variable(images.view(-1, 28*28)) #Forward pass only to get output outputs = model(images) #Get prediction _, predicted = torch.max(outputs.data,1) #total number of labels total += labels.size(0) #Total correct predictions correct += (predicted ==labels).sum() accuracy = 100*correct /total #Print print('Epoch: {}, Loss: {}, Accuracy on testset: {}'.format(epoch, loss.data[0], accuracy))