Hi I have created a UNET model from this github repository, I am trying to train the model using some images with [1, 3, 320, 320] shape, but the output of the model is [1, 2, 320, 320]. I am using
torch.nn.L1Loss()
as loss functions. But the thing is when I try to compare target and output images the loss funtion raise an shape error.
when I tried to just save the output with
utils.save_image
raised another LA mode error. I think output of model is in LA mode but I want it in RGB how should I convert the output to RGB for both comparison and saving output as images?
the model:
class UNet(nn.Module):
def __init__(self, in_channels=3, n_classes=2, feature_scale=2, is_deconv=True, is_batchnorm=True):
super(UNet, self).__init__()
self.in_channels = in_channels
self.feature_scale = feature_scale
self.is_deconv = is_deconv
self.is_batchnorm = is_batchnorm
filters = [64, 128, 256, 512, 1024]
filters = [int(x / self.feature_scale) for x in filters]
# downsampling
self.maxpool = nn.MaxPool2d(kernel_size=2)
self.conv1 = unetConv2(self.in_channels, filters[0], self.is_batchnorm)
self.conv2 = unetConv2(filters[0], filters[1], self.is_batchnorm)
self.conv3 = unetConv2(filters[1], filters[2], self.is_batchnorm)
self.conv4 = unetConv2(filters[2], filters[3], self.is_batchnorm)
self.center = unetConv2(filters[3], filters[4], self.is_batchnorm)
# upsampling
self.up_concat4 = unetUp(filters[4], filters[3], self.is_deconv)
self.up_concat3 = unetUp(filters[3], filters[2], self.is_deconv)
self.up_concat2 = unetUp(filters[2], filters[1], self.is_deconv)
self.up_concat1 = unetUp(filters[1], filters[0], self.is_deconv)
# final conv (without any concat)
self.final = nn.Conv2d(filters[0], n_classes, 1)
# initialise weights
for m in self.modules():
if isinstance(m, nn.Conv2d):
init_weights(m, init_type='kaiming')
elif isinstance(m, nn.BatchNorm2d):
init_weights(m, init_type='kaiming')
def forward(self, inputs):
conv1 = self.conv1(inputs) # 16*512*512
maxpool1 = self.maxpool(conv1) # 16*256*256
conv2 = self.conv2(maxpool1) # 32*256*256
maxpool2 = self.maxpool(conv2) # 32*128*128
conv3 = self.conv3(maxpool2) # 64*128*128
maxpool3 = self.maxpool(conv3) # 64*64*64
conv4 = self.conv4(maxpool3) # 128*64*64
maxpool4 = self.maxpool(conv4) # 128*32*32
center = self.center(maxpool4) # 256*32*32
up4 = self.up_concat4(center,conv4) # 128*64*64
up3 = self.up_concat3(up4,conv3) # 64*128*128
up2 = self.up_concat2(up3,conv2) # 32*256*256
up1 = self.up_concat1(up2,conv1) # 16*512*512
final = self.final(up1)
return final
note that nothing chages If I convert n_classes to 3 in first line.
error
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\Anaconda3\envs\torchgpu\lib\site-packages\PIL\JpegImagePlugin.py in _save(im, fp, filename)
619 try:
--> 620 rawmode = RAWMODE[im.mode]
621 except KeyError:
KeyError: 'LA'
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
in
41
42 if epoch % 10 == 0:
---> 43 utils.save_image(out, os.path.join(result_path, str(i) + '_' + str(item) +'.jpg'), nrow=3, normalize=True)
44 epoch_loss = np.average(np.array(items_losses))
45 print("Epoch number: {} and meanloss value is: {} ".format(epoch, epoch_loss))
C:\Anaconda3\envs\torchgpu\lib\site-packages\torchvision\utils.py in save_image(tensor, filename, nrow, padding, normalize, range, scale_each, pad_value)
103 ndarr = grid.mul_(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
104 im = Image.fromarray(ndarr)
--> 105 im.save(filename)
C:\Anaconda3\envs\torchgpu\lib\site-packages\PIL\Image.py in save(self, fp, format, **params)
1992
1993 try:
-> 1994 save_handler(self, fp, filename)
1995 finally:
1996 # do what we can to clean up
C:\Anaconda3\envs\torchgpu\lib\site-packages\PIL\JpegImagePlugin.py in _save(im, fp, filename)
620 rawmode = RAWMODE[im.mode]
621 except KeyError:
--> 622 raise IOError("cannot write mode %s as JPEG" % im.mode)
623
624 info = im.encoderinfo
OSError: cannot write mode LA as JPEG
error of comparison:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
in
35 #target = trf(target)
36 out = model(original)
---> 37 loss_value = criterion_pixelwise(out, target)
38 #loss_value.backward()
39 #optimizer.step()
C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
89 @weak_script_method
90 def forward(self, input, target):
---> 91 return F.l1_loss(input, target, reduction=self.reduction)
92
93
C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\functional.py in l1_loss(input, target, size_average, reduce, reduction)
2228 ret = torch.mean(ret) if reduction == 'mean' else torch.sum(ret)
2229 else:
-> 2230 expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2231 ret = torch._C._nn.l1_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2232 return ret
C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\functional.py in broadcast_tensors(*tensors)
60 [0, 1, 2]])
61 """
---> 62 return torch._C._VariableFunctions.broadcast_tensors(tensors)
63
64
RuntimeError: The size of tensor a (2) must match the size of tensor b (3) at non-singleton dimension 1
Note that the output can not be in 3 channels. When I change UNET n_classes to 3 the outputs will some complete black images. The output should be converted to RGB color space.
Can you help me?