Pruned network is slower than original network

Hi all,
I am doing face detection with MTCNN - I have the ‘original’ network, and a ‘pruned’ network (i.e. structured pruning, so it’s definitely smaller).

The issue I’m having is that I’m not seeing any speed differences between the two networks during inference. I am simply trying a few images at a time on my CPU. The network structures are below; has anybody encountered anything similar?:

Pruned (execution time: 1.0987770557403564)

<bound method Module.parameters of MTCNN(
  (pnet): PNet(
    (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1))
    (prelu1): PReLU(num_parameters=8)
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv2): Conv2d(8, 7, kernel_size=(3, 3), stride=(1, 1))
    (prelu2): PReLU(num_parameters=7)
    (conv3): Conv2d(7, 13, kernel_size=(3, 3), stride=(1, 1))
    (prelu3): PReLU(num_parameters=13)
    (conv4_1): Conv2d(13, 2, kernel_size=(1, 1), stride=(1, 1))
    (softmax4_1): Softmax(dim=1)
    (conv4_2): Conv2d(13, 4, kernel_size=(1, 1), stride=(1, 1))
  )
  (rnet): RNet(
    (conv1): Conv2d(3, 11, kernel_size=(3, 3), stride=(1, 1))
    (prelu1): PReLU(num_parameters=11)
    (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv2): Conv2d(11, 25, kernel_size=(3, 3), stride=(1, 1))
    (prelu2): PReLU(num_parameters=25)
    (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv3): Conv2d(25, 24, kernel_size=(2, 2), stride=(1, 1))
    (prelu3): PReLU(num_parameters=24)
    (dense4): Linear(in_features=216, out_features=128, bias=True)
    (prelu4): PReLU(num_parameters=128)
    (dense5_1): Linear(in_features=128, out_features=2, bias=True)
    (softmax5_1): Softmax(dim=1)
    (dense5_2): Linear(in_features=128, out_features=4, bias=True)
  )
  (onet): ONet(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
    (prelu1): PReLU(num_parameters=32)
    (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (prelu2): PReLU(num_parameters=64)
    (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (prelu3): PReLU(num_parameters=64)
    (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv4): Conv2d(64, 128, kernel_size=(2, 2), stride=(1, 1))
    (prelu4): PReLU(num_parameters=128)
    (dense5): Linear(in_features=1152, out_features=256, bias=True)
    (prelu5): PReLU(num_parameters=256)
    (dense6_1): Linear(in_features=256, out_features=2, bias=True)
    (softmax6_1): Softmax(dim=1)
    (dense6_2): Linear(in_features=256, out_features=4, bias=True)
    (dense6_3): Linear(in_features=256, out_features=10, bias=True)
  )
)>

Unpruned (execution time: 0.7370638847351074)

<bound method Module.parameters of MTCNN(
  (pnet): PNet(
    (conv1): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1))
    (prelu1): PReLU(num_parameters=10)
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv2): Conv2d(10, 16, kernel_size=(3, 3), stride=(1, 1))
    (prelu2): PReLU(num_parameters=16)
    (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
    (prelu3): PReLU(num_parameters=32)
    (conv4_1): Conv2d(32, 2, kernel_size=(1, 1), stride=(1, 1))
    (softmax4_1): Softmax(dim=1)
    (conv4_2): Conv2d(32, 4, kernel_size=(1, 1), stride=(1, 1))
  )
  (rnet): RNet(
    (conv1): Conv2d(3, 28, kernel_size=(3, 3), stride=(1, 1))
    (prelu1): PReLU(num_parameters=28)
    (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv2): Conv2d(28, 48, kernel_size=(3, 3), stride=(1, 1))
    (prelu2): PReLU(num_parameters=48)
    (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv3): Conv2d(48, 64, kernel_size=(2, 2), stride=(1, 1))
    (prelu3): PReLU(num_parameters=64)
    (dense4): Linear(in_features=576, out_features=128, bias=True)
    (prelu4): PReLU(num_parameters=128)
    (dense5_1): Linear(in_features=128, out_features=2, bias=True)
    (softmax5_1): Softmax(dim=1)
    (dense5_2): Linear(in_features=128, out_features=4, bias=True)
  )
  (onet): ONet(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
    (prelu1): PReLU(num_parameters=32)
    (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (prelu2): PReLU(num_parameters=64)
    (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (prelu3): PReLU(num_parameters=64)
    (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv4): Conv2d(64, 128, kernel_size=(2, 2), stride=(1, 1))
    (prelu4): PReLU(num_parameters=128)
    (dense5): Linear(in_features=1152, out_features=256, bias=True)
    (prelu5): PReLU(num_parameters=256)
    (dense6_1): Linear(in_features=256, out_features=2, bias=True)
    (softmax6_1): Softmax(dim=1)
    (dense6_2): Linear(in_features=256, out_features=4, bias=True)
    (dense6_3): Linear(in_features=256, out_features=10, bias=True)
  )
)>