I have a neural network where the neurons of a single layer (layer 2 in the example) are arranged into some n number of “blocks”. I want to forward propagate through them individually to get to n outputs. Presently, I am doing that using ModuleList and a for loop to go over all the individual blocks. But the code is terribly slow because of this. What is a more efficient way to do this?

Any help would be appreciated, thanks a lot!

```
class small_net(nn.Module):
def __init__(self, num_classes=10):
super(small_net, self).__init__()
self.bCov1 = BinarizeConv2d(3, 50, kernel_size=3, stride=1, padding=1,bias=True)
self.bn1 = nn.BatchNorm2d(50, affine=False)
self.htan1 = nn.Hardtanh(inplace=True)
self.layer2_neuron_blocks = fixed_block_size(50, 10)
self.layer2_convs = nn.ModuleList(
[BinarizeConv2d(i, 100, kernel_size=3, padding=1, bias=True) for i in self.layer2_neuron_blocks])
self.mp2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.layer2_bns = nn.ModuleList([nn.BatchNorm2d(100, affine=False) for _ in self.layer2_neuron_blocks])
self.fc3 = BinarizeLinear(100 * 16 * 16, 20, bias=True)
self.bn3 = nn.BatchNorm1d(20, affine=False)
self.htan3 = nn.Hardtanh(inplace=True)
self.fc4 = BinarizeLinear(20, num_classes, bias=True)
self.bn4 = nn.BatchNorm1d(num_classes, affine=False)
def forward(self, x):
x = self.bCov1(x)
x = self.bn1(x)
x = SignActivation.apply(x)
y2 = [0]+list(np.array(self.layer2_neuron_blocks).cumsum())
out_layer2 = torch.zeros([x.shape[0], len(self.layer2_neuron_blocks), 100, 16, 16])
for i in range(len(y2)-1):
x2 = x[:, y2[i]:y2[i+1], :, :]
x2 = self.layer2_convs[i](x2)
x2 = self.mp2(x2)
x2 = self.layer2_bns[i](x2)
x2 = SignActivation.apply(x2)
out_layer2[:, i, :, :, :] = x2.clone()
x2 = torch.mean(out_layer2, dim=1).to(device)
x = x2.view(-1, 100 * 16 * 16)
x = self.fc3(x)
x = self.bn3(x)
x = self.htan3(x)
x = self.fc4(x)
x = self.bn4(x)
return x
```