Loss Remains Constant

Angry_potato · April 2, 2020, 6:38pm

HI,
I am training a model for neural style transfer,but the loss remains constant.

import torch
import matplotlib.pyplot as plt
import numpy as np
import cv2
import torchvision

class Vgg16(torch.nn.Module):
    def __init__(self):
        super(Vgg16, self).__init__()
        features = list(vgg16(pretrained = True).features.eval())[:23]
        self.features = nn.ModuleList(features).eval() 
        
    def forward(self, x):
        results = []
        for ii,model in enumerate(self.features):
            x = model(x)
            if ii in {3,8,15,22}:
                results.append(x)
        return results

net=Vgg16()
net=net.float()
print(net)

def prod(x):
    j=1
    for i in x:
        j=j*i
    return j

def feature_loss(pred,target):
    loss=(1/prod(pred.shape))*(pred-target).pow(2).sum()
    return loss

def Gram_matrix(inpu):
    p,x,y,z=inpu.shape
    mat=inpu.view(x,y*z)
    G=torch.mm(mat,mat.t())
    return G/prod(inpu.shape)
def style_loss(pred,target):
    loss=(pred-target).pow(2).sum()
    return loss
content_im=cv2.imread('/home/jatin/ML_intern/challenge-master/japanese_garden.jpg')
content_im=cv2.resize(content_im,(224,224))
content_im=np.reshape(content_im,(-1,3,224,224))
content_im=torch.from_numpy(content_im)
content_im=content_im.float()
style_im=cv2.imread('/home/jatin/ML_intern/challenge-master/picasso_selfportrait.jpg')
style_im=cv2.resize(style_im,(224,224))
style_im=np.reshape(style_im,(-1,3,224,224))
style_im=torch.from_numpy(style_im)
style_im=style_im.float()
print(style_im.shape)
inp=torch.randn(1,3,224,224,requires_grad=True)
def get_input_optimizer(input_img):
    optimizer = torch.optim.LBFGS([input_img.requires_grad_()])
    return optimizer
for i in range(10000):
    optimizer=get_input_optimizer(inp)
    def closure():
        inp.data.clamp_(0, 1)
        y_pred=net(inp)
        optimizer.zero_grad()
        loss=0
        for j in range(len(y_pred)):
            if j==1:
                loss=loss+feature_loss(y_pred[j],y_content[j])+style_loss(Gram_matrix(y_pred[j]),Gram_matrix(y_style[j]))
            else:
                loss=loss+style_loss(Gram_matrix(y_pred[j]),Gram_matrix(y_style[j]))
        loss.backward(retain_graph=True)
        return loss
    print('loss '+str(i)+'th iteration=',loss)
    optimizer.step(closure)

ptrblck · April 5, 2020, 7:19am

Could you post dummy tensors to debug the code, please?

Angry_potato · April 5, 2020, 8:53am

Thanks for the reply. I ran the code once again from the beginning then I didn’t encounter this error. Since then the code runs as intended