# How can I get broadcasting benefits?

I have a simple custom NN as below. it works but is very slow. every epoch was accomplished in about 27 minutes !!! how can I vectorize and utilize broadcasting in my implementation?

``````class MaxSigNet(nn.Module):

def __init__(self, in_channels, out_channels):
super(MaxSigNet, self).__init__()

mywheights=torch.tensor([[[[(0), (15/255.0), (3/255.0), (4/255.0), (0)],
[(37/255.0), (1/255.0), (2/255.0), (4/255.0), (6/255.0)],
[(65/255.0), (128/255.0), (0), (8/255.0), (9/255.0)],
[(68/255.0), (64/255.0), (32/255.0), (16/255.0), (15/255.0)],
[(0), (58/255.0), (37/255.0), (24/255.0), (0)]]]],dtype=torch.float32,requires_grad=True)

mymaps=torch.tensor([[[[0,1,1,1,0],
[1,1,1,1,1],
[1,1,0,1,1],
[1,1,1,1,1],

self.weight=nn.Parameter(torch.mul(mywheights,mymaps))
print("my initial weights are:",self.weight)
self.map=nn.Parameter(mymaps)
self.out_channels=out_channels
self.in_channels=in_channels

def Apply_MaxSig(self,img):
newimgn=torch.zeros_like(imgn)
img_max=imgn.max()

for i in range(2,imgn.shape[0]-2):
for j in range(2,imgn.shape[1]-2):

a=torch.flatten(torch.mul(self.weight,self.map))
b=torch.flatten(imgn[i-2:i+3,j-2:j+3])

siga=torch.sigmoid(a)

sigb=torch.sigmoid(b/img_max)

aa=torch.maximum(siga,sigb)
max5=torch.sum(aa)

newimgn[i,j]=(24*sigb[12] - max5)

def forward(self, input_image):
for i in range(self.out_channels):
for j in range(self.in_channels):

img=input_image[i,j]
img=self.Apply_MaxSig(img)
input_image[i,j]=img

return input_image

class build_simple_net(nn.Module):
def __init__(self):
super().__init__()
self.s0 = MaxSigNet(1,1)

def forward(self, inputs):
return self.s0(inputs)
``````

Hi guys. I solved this. maybe useful for somebody.

``````def forward(self, input_image):
B=torch.nn.functional.unfold(torch.sigmoid(input_image), kernel_size=(5,5), padding=2, stride=1)
B=B.reshape(input_image.shape[0],25,input_image.shape[2] , input_image.shape[3])
C=torch.sigmoid(self.weight).reshape(1,25,1,1)
D=torch.maximum(C,B) * self.map.reshape(1,25,1,1)
E=torch.sum(D,axis=1)
F=24*B[:,12,...] - E
return F.unsqueeze(dim=1)
``````