I have a simple custom NN as below. it works but is very slow. every epoch was accomplished in about 27 minutes !!! how can I vectorize and utilize broadcasting in my implementation?
class MaxSigNet(nn.Module):
def __init__(self, in_channels, out_channels):
super(MaxSigNet, self).__init__()
mywheights=torch.tensor([[[[(0), (15/255.0), (3/255.0), (4/255.0), (0)],
[(37/255.0), (1/255.0), (2/255.0), (4/255.0), (6/255.0)],
[(65/255.0), (128/255.0), (0), (8/255.0), (9/255.0)],
[(68/255.0), (64/255.0), (32/255.0), (16/255.0), (15/255.0)],
[(0), (58/255.0), (37/255.0), (24/255.0), (0)]]]],dtype=torch.float32,requires_grad=True)
mymaps=torch.tensor([[[[0,1,1,1,0],
[1,1,1,1,1],
[1,1,0,1,1],
[1,1,1,1,1],
[0,1,1,1,0]]]],dtype=torch.float32,requires_grad=False)
self.weight=nn.Parameter(torch.mul(mywheights,mymaps))
print("my initial weights are:",self.weight)
self.map=nn.Parameter(mymaps)
self.out_channels=out_channels
self.in_channels=in_channels
def Apply_MaxSig(self,img):
imgn=F.pad(input=img, pad=(2,2,2,2), mode='constant',value = 0.0)
newimgn=torch.zeros_like(imgn)
img_max=imgn.max()
for i in range(2,imgn.shape[0]-2):
for j in range(2,imgn.shape[1]-2):
a=torch.flatten(torch.mul(self.weight,self.map))
b=torch.flatten(imgn[i-2:i+3,j-2:j+3])
siga=torch.sigmoid(a)
sigb=torch.sigmoid(b/img_max)
aa=torch.maximum(siga,sigb)
max5=torch.sum(aa)
newimgn[i,j]=(24*sigb[12] - max5)
return newimgn[2:-2,2:-2]#undo pad
def forward(self, input_image):
for i in range(self.out_channels):
for j in range(self.in_channels):
img=input_image[i,j]
img=self.Apply_MaxSig(img)
input_image[i,j]=img
return input_image
class build_simple_net(nn.Module):
def __init__(self):
super().__init__()
self.s0 = MaxSigNet(1,1)
def forward(self, inputs):
return self.s0(inputs)