Strange 'F.conv2d' time?

I define a customized convolution and the forward propogation is as follows:

class PrunedConv2d(nn.Module):

    def __init__(self, in_channels, out_channels, k_in_mask, k_out_mask, output_width, kernel_size, stride=1, padding=0, bias=True):
        super(PrunedConv2d, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size =  _pair(kernel_size)
        self.stride =  _pair(stride)
        self.padding = _pair(padding)

        self.k_in = len(k_in_mask)
        self.k_out = len(k_out_mask)
        self.k_in_mask = Variable(k_in_mask)
        self.k_out_mask = Variable(k_out_mask)
        self.output_width = output_width

        self.weight = nn.Parameter(torch.Tensor(self.k_out, self.k_in, *self.kernel_size))   #32*32*3*3
        self.output = Variable(torch.zeros(1, self.out_channels, self.output_width, self.output_width).cuda())

        if bias:
            self.bias = nn.Parameter(torch.Tensor(self.k_out))
            self.register_parameter('bias', None)
        #count time
        self.squeeze_time = 0
        self.conv_time = 0
        self.expand_time = 0
    def forward(self, input):
        t0 = time.time()
        temp_in = torch.index_select(input, 1, self.k_in_mask)
        t1 = time.time()
        self.squeeze_time = t1 -t0
        t0 = time.time()
        temp_out = F.conv2d(temp_in, self.weight, self.bias, self.stride, self.padding)
        t1 = time.time()
        self.conv_time = t1 -t0
        return temp_out

the ‘’‘self.conv_time’’’ is about 0.0015s
but when I change the return of the forwrad function as follows

 return self.output.index_copy_(1, self.k_out_mask, temp_out)

then the ‘’‘self.conv_time’’’ is about 0.0036s
I don’t know what happens , I feels it is quite strange for me.