I am sure there is no random transformations involved since I am not applying any transformations to my input data (i.e. img). It is a simple numpy.ndarray
of the shape [2, 58, 72]
that is passed to torch.utils.data.DataLoader
without going through any transformations. Also, printing out a small slice of the img in the loop above will always yield the same pixel values no matter how many times I run the loop/cell (something I expect). I am not sure if it is important but the dataset used for training the model is a set of “numpy.ndarray
” s. I did not convert them to Pytorch tensors!
I do agree that this has to be the case and I would like to attract your attention to the point that printing out the auto_grad status of the different layers of the model using “.named_parameters()
” and then applying “.requires_grad
” shows “requires_grad=True
”. Given that the whole block is inside “with torch.no_grad():
” I expected to get “requires_grad=False
”! Is this an expected behaviour? Could it be that the gradient operation is still in action?
The input used for training has the shape of [2, 58, 72]
where 2, 58 and 72 are number of channels, height, and width respectively.
import numpy
import torch
import torch.nn as nn
import torch.optim
import torchvision
class Input(nn.Module):
def __init__(self, in_ch = [2, 64, 64], out_ch = [64, 64, 64], kernel = [(4, 5), (3, 5), (3 ,5)], \
pad = [(4 ,0), (1, 0), (1, 0)], stride = [(2, 1), (1, 1), (1, 1)]):
self.input_ch = in_ch
super().__init__()
# Adjusting the dimensions of the low resolution input!
self.convList = nn.ModuleList([nn.Conv2d(in_ch[i], out_ch[i], kernel_size = kernel[i], \
stride = stride[i], padding = [i]) for i in range(len(kernel))])
self.batchList = nn.ModuleList([nn.BatchNorm2d(out_ch[j]) for j in range(len(in_ch))])
# Independent Section!
self.active = nn.LeakyReLU()
def forward(self, lowOut):
### The following "bicubic" interpolation will be used at the very end!!!
bicubicInterpol = nn.functional.interpolate(lowOut, size = (144, 180), mode = "bicubic")
###
for count in range(len(self.input_ch)):
lowOut = self.convList[count](lowOut.cuda("cuda:0"))
lowOut = self.batchList[count](lowOut.cuda("cuda:0"))
lowOut = self.active(lowOut)
return lowOut, bicubicInterpol
class Upscale(nn.Module):
"""This function upsamples an input tensor to the spatial dimensions of the target tensor using a
combination of bilinear interpolation and convolution."""
def __init__(self, targetFeature = (64, 144, 180)):
super().__init__()
self.firstUpsample = nn.Conv2d(64, 64, 3, padding = 1)
self.targetFeature = targetFeature
def forward(self, inputT):
inputT_ch = inputT.shape[1]
inputT_H = inputT.shape[2]
inputT_W = inputT.shape[3]
targetT_ch= self.targetFeature[0]
targetT_H = self.targetFeature[1]
targetT_W = self.targetFeature[2]
firstInterpol = nn.functional.interpolate(inputT, size = (targetT_H, targetT_W), mode = "bilinear")
upsampleOne = self.firstUpsample(firstInterpol)
return upsampleOne
class Residual(nn.Module):
def __init__(self, sameChannel = 128): # default initialization to 128 (from the first layer of the Encoding)
super().__init__()
self.resConv = nn.Conv2d(sameChannel, sameChannel, 3, padding = 1).cuda('cuda:0')
def forward(self, inputT):
inputT_ch = inputT.shape[1]
inputT_H = inputT.shape[2]
inputT_W = inputT.shape[3]
out = self.resConv(inputT)
out = self.resConv(out)
out = self.resConv(out)
addition = out + inputT
return addition
class Encoding(nn.Module):
def __init__(self, channel = [64, 128, 192, 256, 320, 384, 448], stride = [(2, 1), (1, 3), \
(2, 1), (3, 3), (2, 2), (2, 2)]):
super().__init__()
self.channel = channel
self.enConvList = nn.ModuleList([nn.Conv2d(channel[i], channel[i + 1], 3, stride = stride[i], \
padding = 1) for i in range(len(channel) - 1)])
self.batchList = nn.ModuleList([nn.BatchNorm2d(channel[j + 1]) for j in range(len(channel) - 1)])
self.active = nn.LeakyReLU()
def forward(self, out):
concatList = []
for i in range(len(self.channel) - 1):
concatList.append(out)
### strided convolution block
out = self.enConvList[i](out)
### batch normalization block
out = self.batchList[i](out)
### activation block
out = self.active(out)
### residual block
residual = Residual(out.shape[1]) # 1 to get the channel number
out = residual(out)
return out, concatList
class Decoding(nn.Module):
def __init__(self, upChannel_in = [448, 384, 320, 256, 192, 128],\
upChannel_out = [384, 320, 256, 192, 128, 64] ,\
deconvChannel2_in = [768, 640, 512, 384, 256, 128],\
deconvChannel2_out = [384, 320, 256, 192, 128, 64] ,\
upKernel = [(3, 3), (3, 3), (3, 3), (3, 3), (3, 5), (3, 3)], \
upPadding = [1, 1, 1, 1, (1, 2), 1], finalIn = 64, finalOut = 2):
super().__init__()
self.residual = Residual()
self.channelLength = upChannel_in
self.deConvList = nn.ModuleList([nn.Conv2d(deconvChannel2_in[i], deconvChannel2_out[i], 3, padding = 1) \
for i in range(len(upChannel_in))])
self.batch = nn.ModuleList([nn.BatchNorm2d(deconvChannel2_out[k]) for k in range(len(deconvChannel2_out))])
self.active = nn.LeakyReLU()
### Upsampling block preparation ###
self.upConv = nn.ModuleList([nn.Conv2d(upChannel_in[i], upChannel_out[i], kernel_size = upKernel[i], \
padding = upPadding[i], padding_mode = "replicate") for i in range(len(upChannel_out))])
self.upBatch = nn.ModuleList([nn.BatchNorm2d(upChannel_out[j]) for j in range(len(upChannel_out))])
self.upActive = nn.LeakyReLU()
### Last deconvolution layer ###
self.lastConv = nn.Conv2d(finalIn, finalOut, 3, padding = 1, padding_mode = "replicate")
def interpolation(self, inputT, targetT):
"""This function upsamples an input tensor to the spatial dimensions of the target tensor using a
combination of bilinear interpolation and convolution."""
inputT_ch = inputT.shape[1]
inputT_H = inputT.shape[2]
inputT_W = inputT.shape[3]
targetT_ch= targetT.shape[1]
targetT_H = targetT.shape[2]
targetT_W = targetT.shape[3]
interpol = nn.functional.interpolate(inputT, size = (targetT_H, targetT_W), mode = "bilinear")
return interpol
def forward(self, out, enList):
enList = enList[: : -1] # reversing the order of elements inside the list.
for l in range(len(self.channelLength)):
### Upscaling = interpolation + convolution ###
out = self.interpolation(out, enList[l])
out = self.upConv[l](out)
out = self.upBatch[l](out)
out = self.upActive(out)
### Concatenation ###
out = torch.cat([out, enList[l]], 1) # concatenation on channel number!
### Decoding ###
out = self.deConvList[l](out)
out = self.batch[l](out)
out = self.active(out)
### Residual block
residual = Residual(out.shape[1]) # 1 to get the channel number
out = residual(out)
### This is the last convolution layer!
out = self.lastConv(out)
return out
class DeepTest(nn.Module):
def __init__(self):
super().__init__()
self.input = Input()
self.upscale = Upscale()
self.encoder = Encoding()
self.decoder = Decoding()
def forward(self, lowRes):
out, bicub = self.input(lowRes)
out = self.upscale(out)
out, enlist = self.encoder(out)
out = self.decoder(out, enlist)
finalOut = out + bicub
return finalOut
And I use the following code snippet to initiate the training process:
import datetime
from torch import optim
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.cuda.empty_cache()
n_epochs = 100
loss_fn = torch.nn.MSELoss()
model = DeepTest().to(device)
lrate = 1e-3
optimizer = optim.Adam(model.parameters(), weight_decay = 1e-4)
train_loader = torch.utils.data.DataLoader(outList, batch_size=20, shuffle=True, num_workers=0)
for epoch in range(1, n_epochs + 1):
loss_train = 0.0
for img, label in train_loader:
img = img.to(device)
label = label.to(device)
output = model(img) # here lebel serves as the second input to our model!
loss = loss_fn(output, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_train += loss.item()
if epoch == 1 or epoch % 10 == 0:
print('{} Epoch {}, Training loss {}'.format(\
datetime.datetime.now(), epoch,\
loss_train / len(train_loader)))