Hi Simon,
Yes sure sorry I should have done that earlier. It s just a GAN generator from 16x16 input noise to 256x256 output image, I just train it with L1Loss just to show you the issue (sorry if the code is messy I am a beginner). There you go:
from torch import optim
import torch.nn as nn
from tqdm import tqdm
from torch.utils.data import DataLoader
from torch.utils import data
import numpy as np
class Generator(nn.Module):
def __init__(self, image_size=64, z_size=16, conv_dim=64):
super().__init__()
self.n_up = int(np.log2(image_size/z_size))
curr_channel = 1
out_channels = conv_dim
for i in range(self.n_up):
self.__dict__["_modules"]["upconv"+str(i+1)] = nn.Sequential(
spectral_norm(
nn.ConvTranspose2d(curr_channel,
out_channels,
4,
2,
1)
),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=False),
)
curr_channel = out_channels
out_channels = curr_channel//2
if i>=(self.n_up-3):
self.__dict__["_modules"]["conv"+str(i+1)] = nn.Sequential(
nn.Conv2d(curr_channel, 1, 1, 1, 0),
nn.Tanh(),
)
def forward(self, z):
res = []
for i in range(self.n_up):
z = self.__dict__["_modules"]["upconv"+str(i+1)](z)
if i==(self.n_up-1):
res.append(self.__dict__["_modules"]["conv"+str(i+1)](z))
return res[-1]
class DatasetTest(data.Dataset):
def __init__(self, data, target):
self.data = data
self.target = target
def __len__(self):
return self.data.size()[0]
def __getitem__(self, index):
return {"data": self.data[index], "target": self.target[index]}
inp = torch.rand(100, 1, 16, 16)
tar = torch.rand(100, 1, 256, 256)
data = DatasetTest(inp, tar)
loader = DataLoader(dataset=data, batch_size=10, shuffle=True)
gen = Generator(image_size=256, z_size=16, conv_dim=64).cuda()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, gen.parameters()), lr=0.0004)
gen = nn.DataParallel(gen).cuda()
crit = nn.L1Loss()
for batch in tqdm(loader):
inp = batch["data"].cuda()
tar = batch["target"].cuda()
out = gen(inp)
loss = crit(out, tar.detach())
loss.backward()
optimizer.step()
As I said earlier, the spectral normalization is just a copy pasted of the available version of the source code without any change so if you are on 0.4.1 please add:
from torch.nn.utils.spectral_norm import *
at the beginning of this code.
After investigation, it happens that if I don t use data parallelisation, everything works. But if I do then I get the following error:
File "brain_anomaly_detection/models/spectral_norm.py", line 185, in <module>
loss.backward()
File "/home/joutars/anaconda2/envs/py36/lib/python3.6/site-packages/torch/tensor.py", line 93, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/home/joutars/anaconda2/envs/py36/lib/python3.6/site-packages/torch/autograd/__init__.py", line 89, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: Tensor: invalid storage offset at /pytorch/aten/src/THC/generic/THCTensor.c:759
Any idea?
Thanks