Hi I’m trying to train an optical flow cnn-model however my model doesn’t train:
here is my model :
class Network_piv(torch.nn.Module):
def init(self):
super(Network_piv,self).__init__()
class Features(torch.nn.Module):
def __init__(self):
super(Features,self).__init__()
self.netOne = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=7, stride=1, padding=3),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1)
)
self.netTwo = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1)
)
self.netThr = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(2,5), stride=1, padding=2),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1)
)
self.netFou = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=64, out_channels=96, kernel_size=3, stride=2, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=96, out_channels=96, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1)
)
self.netFiv = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=96, out_channels=128, kernel_size=3, stride=2, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1)
)
self.netSix = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=128, out_channels=192, kernel_size=3, stride=2, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1)
)
# end
def forward(self, tenInput):
tenOne = self.netOne(tenInput)
tenTwo = self.netTwo(tenOne)
tenThr = self.netThr(tenTwo)
tenFou = self.netFou(tenThr)
tenFiv = self.netFiv(tenFou)
tenSix = self.netSix(tenFiv)
return [ tenOne, tenTwo, tenThr, tenFou, tenFiv, tenSix ]
# end
# end
class Matching(torch.nn.Module):
def __init__(self,intLevel):
super(Matching,self).__init__()
self.fltBackwarp = [0.0, 0.0, 10.0, 5.0, 2.5, 1.25, 0.625][intLevel]
if intLevel != 2:
self.netFeat = torch.nn.Sequential()
elif intLevel == 2:
self.netFeat = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=32,out_channels=64,kernel_size=1,stride=1,padding=0),
torch.nn.LeakyReLU(inplace=False,negative_slope=0.1)
)
self.netFeat = torch.nn.Sequential()
if intLevel == 6:
self.netUpflow = None
elif intLevel != 6:
self.netUpflow = torch.nn.ConvTranspose2d(in_channels=2, out_channels=2, kernel_size=4, stride=2, padding=1, bias=False, groups=2)
# end
if intLevel >= 4:
self.netUpcorr = None
elif intLevel < 4:
self.netUpcorr = torch.nn.ConvTranspose2d(in_channels=49, out_channels=49, kernel_size=4, stride=2, padding=1, bias=False, groups=49)
# end
self.netMain = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=49, out_channels=128, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=32, out_channels=2, kernel_size=[ 0, 0, 7, 5, 5, 3, 3 ][intLevel], stride=1, padding=[ 0, 0, 3, 2, 2, 1, 1 ][intLevel])
)
# end
def forward(self, tenFirst, tenSecond, tenFeaturesFirst, tenFeaturesSecond, tenFlow):
tenFeaturesFirst = self.netFeat(tenFeaturesFirst)
tenFeaturesSecond = self.netFeat(tenFeaturesSecond)
if tenFlow is not None:
tenFlow = self.netUpflow(tenFlow)
# end
if tenFlow is not None:
tenFeaturesSecond = backwarp(tenInput=tenFeaturesSecond, tenFlow=tenFlow * self.fltBackwarp)
# end
if self.netUpcorr is None:
tenCorrelation = torch.nn.functional.leaky_relu(input=correlation.FunctionCorrelation(tenFirst=tenFeaturesFirst, tenSecond=tenFeaturesSecond, intStride=1), negative_slope=0.1, inplace=False)
elif self.netUpcorr is not None:
tenCorrelation = self.netUpcorr(torch.nn.functional.leaky_relu(input=correlation.FunctionCorrelation(tenFirst=tenFeaturesFirst, tenSecond=tenFeaturesSecond, intStride=2), negative_slope=0.1, inplace=False))
# end
return (tenFlow if tenFlow is not None else 0.0) + self.netMain(tenCorrelation)
# end
# end
class Subpixel(torch.nn.Module):
def __init__(self, intLevel):
super(Subpixel, self).__init__()
self.fltBackward = [ 0.0, 0.0, 10.0, 5.0, 2.5, 1.25, 0.625 ][intLevel]
if intLevel != 2:
self.netFeat = torch.nn.Sequential()
elif intLevel == 2:
self.netFeat = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=1, stride=1, padding=0),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1)
)
# end
self.netMain = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=[ 0, 0, 130, 130, 194, 258, 386 ][intLevel], out_channels=128, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=32, out_channels=2, kernel_size=[ 0, 0, 7, 5, 5, 3, 3 ][intLevel], stride=1, padding=[ 0, 0, 3, 2, 2, 1, 1 ][intLevel])
)
# end
def forward(self, tenFirst, tenSecond, tenFeaturesFirst, tenFeaturesSecond, tenFlow):
tenFeaturesFirst = self.netFeat(tenFeaturesFirst)
tenFeaturesSecond = self.netFeat(tenFeaturesSecond)
if tenFlow is not None:
tenFeaturesSecond = backwarp(tenInput=tenFeaturesSecond, tenFlow=tenFlow * self.fltBackward)
# end
return (tenFlow if tenFlow is not None else 0.0) + self.netMain(torch.cat([ tenFeaturesFirst, tenFeaturesSecond, tenFlow ], 1))
# end
# end
class Regularization(torch.nn.Module):
def __init__(self, intLevel):
super(Regularization, self).__init__()
self.fltBackward = [ 0.0, 0.0, 10.0, 5.0, 2.5, 1.25, 0.625 ][intLevel]
self.intUnfold = [ 0, 0, 7, 5, 5, 3, 3 ][intLevel]
if intLevel >= 5:
self.netFeat = torch.nn.Sequential()
elif intLevel < 5:
self.netFeat = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=[ 0, 0, 32, 64, 96, 128, 192 ][intLevel], out_channels=128, kernel_size=1, stride=1, padding=0),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1)
)
# end
self.netMain = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=[ 0, 0, 131, 131, 131, 131, 195 ][intLevel], out_channels=128, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
torch.nn.LeakyReLU(inplace=False, negative_slope=0.1)
)
if intLevel >= 5:
self.netDist = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=32, out_channels=[ 0, 0, 49, 25, 25, 9, 9 ][intLevel], kernel_size=[ 0, 0, 7, 5, 5, 3, 3 ][intLevel], stride=1, padding=[ 0, 0, 3, 2, 2, 1, 1 ][intLevel])
)
elif intLevel < 5:
self.netDist = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=32, out_channels=[ 0, 0, 49, 25, 25, 9, 9 ][intLevel], kernel_size=([ 0, 0, 7, 5, 5, 3, 3 ][intLevel], 1), stride=1, padding=([ 0, 0, 3, 2, 2, 1, 1 ][intLevel], 0)),
torch.nn.Conv2d(in_channels=[ 0, 0, 49, 25, 25, 9, 9 ][intLevel], out_channels=[ 0, 0, 49, 25, 25, 9, 9 ][intLevel], kernel_size=(1, [ 0, 0, 7, 5, 5, 3, 3 ][intLevel]), stride=1, padding=(0, [ 0, 0, 3, 2, 2, 1, 1 ][intLevel]))
)
# end
self.netScaleX = torch.nn.Conv2d(in_channels=[ 0, 0, 49, 25, 25, 9, 9 ][intLevel], out_channels=1, kernel_size=1, stride=1, padding=0)
self.netScaleY = torch.nn.Conv2d(in_channels=[ 0, 0, 49, 25, 25, 9, 9 ][intLevel], out_channels=1, kernel_size=1, stride=1, padding=0)
# eny
def forward(self, tenFirst, tenSecond, tenFeaturesFirst, tenFeaturesSecond, tenFlow):
tenDifference = (tenFirst - backwarp(tenInput=tenSecond, tenFlow=tenFlow * self.fltBackward)).pow(2.0).sum(1, True).sqrt().detach()
tenDist = self.netDist(self.netMain(torch.cat([ tenDifference, tenFlow - tenFlow.view(tenFlow.shape[0], 2, -1).mean(2, True).view(tenFlow.shape[0], 2, 1, 1), self.netFeat(tenFeaturesFirst) ], 1)))
tenDist = tenDist.pow(2.0).neg()
tenDist = (tenDist - tenDist.max(1, True)[0]).exp()
tenDivisor = tenDist.sum(1, True).reciprocal()
tenScaleX = self.netScaleX(tenDist * torch.nn.functional.unfold(input=tenFlow[:, 0:1, :, :], kernel_size=self.intUnfold, stride=1, padding=int((self.intUnfold - 1) / 2)).view_as(tenDist)) * tenDivisor
tenScaleY = self.netScaleY(tenDist * torch.nn.functional.unfold(input=tenFlow[:, 1:2, :, :], kernel_size=self.intUnfold, stride=1, padding=int((self.intUnfold - 1) / 2)).view_as(tenDist)) * tenDivisor
return torch.cat([ tenScaleX, tenScaleY ], 1)
# end
# end
self.netFeatures = Features()
self.netMatching = torch.nn.ModuleList([ Matching(intLevel) for intLevel in [ 2, 3, 4, 5, 6 ] ])
self.netSubpixel = torch.nn.ModuleList([ Subpixel(intLevel) for intLevel in [ 2, 3, 4, 5, 6 ] ])
self.netRegularization = torch.nn.ModuleList([ Regularization(intLevel) for intLevel in [ 2, 3, 4, 5, 6 ] ])
def forward(self, tenFirst, tenSecond):
tenFirst[:, 0, :, :] = tenFirst[:, 0, :, :] - 0.411618
tenFirst[:, 1, :, :] = tenFirst[:, 1, :, :] - 0.434631
tenFirst[:, 2, :, :] = tenFirst[:, 2, :, :] - 0.454253
tenSecond[:, 0, :, :] = tenSecond[:, 0, :, :] - 0.410782
tenSecond[:, 1, :, :] = tenSecond[:, 1, :, :] - 0.433645
tenSecond[:, 2, :, :] = tenSecond[:, 2, :, :] - 0.452793
'''
tenFirst = [ tenFirst ]
tenSecond = [ tenSecond ]
'''
tenFeaturesFirst = self.netFeatures(tenFirst)
tenFeaturesSecond = self.netFeatures(tenSecond)
tenFirst = [ tenFirst ]
tenSecond = [ tenSecond ]
for intLevel in [ 1, 2, 3, 4, 5 ]:
tenFirst.append(torch.nn.functional.interpolate(input=tenFirst[-1], size=(tenFeaturesFirst[intLevel].shape[2], tenFeaturesFirst[intLevel].shape[3]), mode='bilinear', align_corners=False))
tenSecond.append(torch.nn.functional.interpolate(input=tenSecond[-1], size=(tenFeaturesSecond[intLevel].shape[2], tenFeaturesSecond[intLevel].shape[3]), mode='bilinear', align_corners=False))
# end
tenFlow = None
for intLevel in [ -1, -2, -3, -4, -5,]:
if intLevel == -5:
tenFlow = tenFlow[:,:,0:109,:]
tenFlow = self.netMatching[intLevel](tenFirst[intLevel], tenSecond[intLevel], tenFeaturesFirst[intLevel], tenFeaturesSecond[intLevel], tenFlow)
tenFlow = self.netSubpixel[intLevel](tenFirst[intLevel], tenSecond[intLevel], tenFeaturesFirst[intLevel], tenFeaturesSecond[intLevel], tenFlow)
tenFlow = self.netRegularization[intLevel](tenFirst[intLevel], tenSecond[intLevel], tenFeaturesFirst[intLevel], tenFeaturesSecond[intLevel], tenFlow)
# end
tenFlow = self.netMatching[0](tenFirst[0], tenSecond[0], tenFeaturesFirst[0], tenFeaturesSecond[0], tenFlow)
tenFlow = self.netSubpixel[0](tenFirst[0], tenSecond[0], tenFeaturesFirst[0], tenFeaturesSecond[0], tenFlow)
#tenFlow = self.netRegularization[0](tenFirst[0], tenSecond[0], tenFeaturesFirst[0], tenFeaturesSecond[0], tenFlow)
return tenFlow * 20.0
end
end
and here is my training :
import torch.optim as optim
from tqdm import tqdm
from PIL import Image
import PIL
import flowiz as fz
import torch.nn.functional as F
optimizer = optim.Adam(model.parameters(),lr=0.0001)
loss_function = torch.nn.MSELoss().cuda()
#target = Variable(target.cuda(), requires_grad=False)
batch_size = 8
epochs = 10
for epo in range(epochs):
model.train()
for i in tqdm(range(0,len(X_train_1),batch_size)):
model.train()
batch_X_1 = torch.FloatTensor(X_train_1[i:i+batch_size]).view(-1,3,436,1024)
batch_X_2 = torch.FloatTensor(X_train_2[i:i+batch_size]).view(-1,3,436,1024)
batch_y = torch.FloatTensor(Y_train[i:i+batch_size]).cuda()
model.zero_grad()
output = model(batch_X_1.cuda(),batch_X_2.cuda())
output = computeImg(output[0,:,:].view((436,1024,2)).cpu().numpy())
plt.imshow(output)
plt.show()
plt.imshow(Y_train[i])
plt.show()
# creating a image object (main image)
'''
output = Image.save('./output/output_%d.flo',i)
files = glob.glob('output.flo')
output = img = fz.convert_from_file(output)
'''
output = torch.FloatTensor(output).cuda()
#batch_y = batch_y.requires_grad=False
loss = loss_function(output,batch_y)
loss.requires_grad = True
loss.backward()
optimizer.step()
print(loss)