I am following this tutorial here. When I try to run the code to get the output image I get this error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 128, 385, 256]], which is output 0 of AddBackward0, is at version 2; expected version 1 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
I am not sure what I am doing wrong or what to do to correct it. Here is my code:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torchvision.models as models
import copy
import numpy as np
# In[35]:
# This detects if cuda is available for GPU training otherwise will use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# In[36]:
# Desired size of the output image
imsize = 512 if torch.cuda.is_available() else 256
print(imsize)
# In[37]:
# Helper function
def image_loader(image_name, imsize):
# Scale the imported image and transform it into a torch tensor
loader = transforms.Compose([transforms.Resize(imsize), transforms.ToTensor()])
image = Image.open(image_name)
# Fake batch dimension required to fit network's input dimension
image = loader(image).unsqueeze(0)
return image.to(device, torch.float)
# Helper function to show the tensor as a PIL image
def imshow(tensor, title=None):
unloader = transforms.ToPILImage()
image = tensor.cpu().clone()
image = unloader(image)
plt.imshow(image)
if title is not None:
plt.title(title)
plt.pause(0.001) # Pause so that the plots are updated
# In[38]:
# Loading of images
image_directory = './images/'
style_img = image_loader(image_directory + "pb.jpg", imsize)
content_img = image_loader(image_directory + "content.jpg", imsize)
assert style_img.size() == content_img.size(), "we need to import style and content images of the same size"
# In[39]:
plt.ion()
# In[40]:
plt.figure()
imshow(style_img, title='style image')
# In[32]:
plt.figure()
imshow(content_img, title='content image')
# In[58]:
class ContentLoss(nn.Module):
def __init__(self, target,):
super(ContentLoss, self).__init__()
# We detach the target content from the tree used to dynamically
# compute the gradient: this is stated value,
# not a variable. Otherwise the forward method of the criterion will throw an error
self.target = target.detach()
def forward(self, input):
self.loss = F.mse_loss(input, self.target)
return input
# This is for the syle loss
def gram_matrix(input):
a, b, c, d = input.size()
features = input.view(a*b, c*d)
G = torch.mm(features, features.t()) # compute the gram product
# We normalize the values of the gram matrix by dividing by the number of element in
# each feature maps
return G.div(a*b*c*d)
class StyleLoss(nn.Module):
def __init__(self, target_feature):
super(StyleLoss, self).__init__()
self.target = gram_matrix(target_feature).detach()
def forward(self, input):
G = gram_matrix(input)
self.loss = F.mse_loss(G, self.target)
return input
# In[42]:
# Importing the VGG 19 model like iun the paper (here we set it to evaluation mode)
cnn = models.vgg19(pretrained=True).features.to(device).eval()
# In[43]:
# VGG netowrk are normalized with special values for the mean and std
cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)
# In[44]:
# Create a module to normalize the input image so we can easily put it ina nn.Sequential
class Normalization(nn.Module):
def __init__(self, mean, std):
super(Normalization, self).__init__()
# view the mean and std to amke them [C * 1 * 1] so that they can
# directly work with image Tensor of shape [B * C * H * W]
# B is a batch size, C is number of channels, H is height, and W is width
self.mean = torch.tensor(mean).view(-1, 1, 1)
self.std = torch.tensor(std).view(-1, 1, 1)
def forward(self, img):
return (img - self.mean) / self.std
# In[46]:
# Desired depth layers to compute style/content losses
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_5']
num_steps = 300
# In[49]:
def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
style_img, content_img,
content_layers=content_layers_default,
style_layers=style_layers_default):
cnn = copy.deepcopy(cnn)
# Normalization module
normalization = Normalization(normalization_mean, normalization_std).to(device)
# Just in order to have an iterable access to or list of content/style losses
content_losses = []
style_losses = []
# Assuming that cnn is a nn.Sequential(), so we make a new nn.Sequential
# to put in modules that are supposed to be activated sequentially
model = nn.Sequential(normalization)
i = 0
for layer in cnn.children():
if isinstance(layer, nn.Conv2d):
i += 1
name = 'conv_{}'.format(i)
elif isinstance(layer, nn.ReLU):
name = 'relu_{}'.format(i)
layer = nn.ReLU(inplace=False)
elif isinstance(layer, nn.MaxPool2d):
name = 'pool_{}'.format(i)
elif isinstance(layer, nn.BatchNorm2d):
name = 'bn_{}'.format(i)
else:
raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__))
model.add_module(name, layer)
if name in content_layers:
target = model(content_img).detach()
content_loss = ContentLoss(target)
model.add_module("content_loss_{}".format(i), content_loss)
content_losses.append(content_loss)
if name in style_layers:
target_feature = model(style_img).detach()
style_loss = StyleLoss(target_feature)
model.add_module("style_loss_{}".format(i), style_loss)
style_losses.append(style_loss)
# Now we trum off the layers after the last content and style losses
for i in range(len(model) - 1, -1, -1):
if isinstance(model[i], ContentLoss) or isinstance(model[i], StyleLoss):
break
model = model[:(i+1)]
return model, style_losses, content_losses
# In[60]:
input_img = content_img.clone()
# input_img = torch.randn(content_img.data.size(),device=device)
plt.figure()
imshow(input_img, title= "Input image")
# In[61]:
def get_input_optimizer(input_img):
optimizer = optim.LBFGS([input_img.requires_grad_()])
return optimizer
# In[62]:
def run_style_transfer(cnn, normalization_mean, normalization_std,
content_img, style_img, input_img, num_steps=300,
style_weight=1000000, content_weight=1):
print('Building the style transfer model...')
model, style_losses, content_losses = get_style_model_and_losses(cnn,
normalization_mean, normalization_std, style_img, content_img)
optimizer = get_input_optimizer(input_img)
print('Optimizing...')
run = [0]
while run[0] <= num_steps:
def closure():
input_img.data.clamp_(0,1)
optimizer.zero_grad()
model(input_img)
style_score = 0
content_score = 0
for style_layer in style_losses:
style_score += (1/5)*style_layer.loss
for content_layer in content_losses:
content_score += content_layer.loss
style_score *= style_weight
content_score *= content_weight
loss = style_score + content_score
loss.backward()
run[0] += 1
if run[0] % 50 == 0:
print("run {}:".format(run))
print('Style Loss : {:4f} Conent Loss: {:4f}'.format(style_score.item(), content_score.item()))
print()
return style_score + content_score
optimizer.step(closure)
input_img.data.clamp_(0,1)
return input_img
# In[63]:
output = run_style_transfer(cnn,cnn_normalization_mean, cnn_normalization_std,
content_img, style_img, input_img, num_steps=num_steps)
plt.figure()
imshow(output, title='Output image')