Ineffective Backward Propogation of losses

class ModifiedNet(nn.Module):
    def __init__(self):
        super().__init__() # just run the init of parent class (nn.Module)
        self.conv1 = nn.Conv2d(1, 16, 11, padding = 5)
        self.conv2 = nn.Conv2d(16, 16, 7, padding = 3)
        self.conv3 = nn.Conv2d(16, 16, 5, padding = 2)
        self.conv4 = nn.Conv2d(16, 16, 3, padding = 1)
        self.conv5 = nn.Conv2d(16, 16, 3, padding = 1)
        self.conv6 = nn.Conv2d(16, 1, 1)

    def convs(self, x, mask):
        # max pooling over 2x2
        x = F.relu(self.conv1(x * mask))
        x = x / torch.sum(mask)
        mask = F.max_pool2d(mask, kernel_size=11, stride = 1, padding = 5)
        x = F.relu(self.conv2(x * mask))
        x = x / torch.sum(mask)
        mask = F.max_pool2d(mask, kernel_size=7, stride = 1, padding = 3)
        x = F.relu(self.conv3(x * mask))
        x = x / torch.sum(mask)
        mask = F.max_pool2d(mask, kernel_size=5, stride = 1, padding = 2)
        x = F.relu(self.conv4(x * mask))
        x = x / torch.sum(mask)
        mask = F.max_pool2d(mask, kernel_size=3, stride = 1, padding = 1)
        x = F.relu(self.conv5(x * mask))
        x = x / torch.sum(mask)
        mask = F.max_pool2d(mask, kernel_size=3, stride = 1, padding = 1)
        x = self.conv6(x * mask)
        x = x / torch.sum(mask)
        return x

    def forward(self, x, mask):
        x = self.convs(x, mask)
        return x

I wanted to implement a sparse convolutional neural network for interpolation of depth maps. I implemented the algorithm but the problem which is arising now is since I am normalising the output at end of every layer the the autograd while backpropogating dilutes the error at subsequent layers which is causing very minimum or no change in weights and my network is not able to fit the data the losses being same in each epoch. Please help me with this I am really stuck I checked every forum and I started using pyTorch a week before.