Weird output of forward hook when use multi-gpus

Hi,
I’m trying to get intermedia output using forward hooks. Howerver with multiple-gpus, this does not work. Each gpu will receive a fraction of the input, so we need to aggregate the results coming from different gpus. I find data_parallel meet my need and I write a simple test program below. But weird things happened.
I give 3 inputs to vgg model in turn. The first is all -1 and the second is all zeros and the third is all 1.

  • For the first input, the output is None(I test, whaterer the first input is, it outputs None without any change)
  • For the second input, the output is not None but not all zeros(In fact when you feed all zeros tensor to net with ReLU, the intermedia is all zeros naturally). However, look closely, the output is corresponding to the first input
  • For the third input, the output corresponds to the second input, are all zeros.

Am I missing something important and writing problematic code ? Please let me known.

import torch
from torchvision.models.vgg import vgg19

class Wrapper(torch.nn.Module):
    def __init__(self, model):
        super(Wrapper, self).__init__()
        self.model = model
        self.target_outputs = None

        def forward_hook(_, __, output):
            self.target_outputs = output.detach()
        self.model.features[2].register_forward_hook(forward_hook)

    def forward(self,input):
        self.model(input)
        return self.target_outputs


model = vgg19()
model = model.cuda(4)
wrapper = Wrapper(model)

devices = [4, 5]

input1 = torch.randn(60,3,224,224).fill_(-1).cuda(4)
out1 = torch.nn.parallel.data_parallel(wrapper, input1, devices)
print(out1)
input2 = torch.randn(60,3,224,224).fill_(0).cuda(4)
out2 = torch.nn.parallel.data_parallel(wrapper, input2, devices)
print(out2.shape)
input3 = torch.randn(60,3,224,224).fill_(1).cuda(4)
out3 = torch.nn.parallel.data_parallel(wrapper, input3, devices)
print(out3.shape)

output:

None
(60, 64, 224, 224)
(60, 64, 224, 224)
1 Like

I have tried several ways to implement this in multi-gpus mode, one of them is not so elegent but works, which returns right intermedia feature map, however another one does’t work which returns None and false result.
I gauss the condition is revelent to the assignment self.target_outputs = output.detach() in hook function. What variable the forward output is assigned to matters.

# the one that works
class Wrapper(torch.nn.Module):
    def __init__(self, model):
        super(Wrapper, self).__init__()
        self.model = model

        def f_hook(module, __, output):
            module.register_buffer('target_outputs', output)
        self.model.features[2].register_forward_hook(f_hook)

    def forward(self,input):
        self.model(input)
        return self.model.features[2].target_outputs

    def __repr__(self):
        return "Wrappper"


# another one that not works. The implemetation and the result is the same as what is stated in question part.
class Wrapper1(torch.nn.Module):
    def __init__(self, model):
        super(Wrapper1, self).__init__()
        self.model = model
        self.target_outputs = None

        def f_hook(_, __, output):
            self.target_outputs = output.detach()
        self.model.features[2].register_forward_hook(f_hook)

    def forward(self,input):
        self.model(input)
        return self.target_outputs

    def __repr__(self):
        return "Wrappper1"

# test code
if __name__ == '__main__':
    devices = [4,5]
  
    model = vgg19().cuda(4)
    model = model.cuda(4)
    wrapper = Wrapper(model)
    wrapper = wrapper.cuda(4)
    input1 = torch.randn(60,3,224,224).fill_(0).cuda(4)  
    out1 = torch.nn.parallel.data_parallel(wrapper, input1, devices)
    print(out1) if out1 is not None else None
    # print a right feature map
    input2 = torch.randn(60,3,224,224).fill_(1).cuda(4)  
    out2 = torch.nn.parallel.data_parallel(wrapper, input2, devices)
    print(out2) if out2 is not None else None
    # print a right feature map 

    model = vgg19().cuda(4)
    model = model.cuda(4)
    wrapper = Wrapper1(model)
    wrapper = wrapper.cuda(4)
    input1 = torch.randn(60,3,224,224).fill_(0).cuda(4)
    out1 = torch.nn.parallel.data_parallel(wrapper, input1, devices)
    print(out1) if out1 is not None else None
    # print None
    input2 = torch.randn(60,3,224,224).fill_(1).cuda(4)  
    out2 = torch.nn.parallel.data_parallel(wrapper, input2, devices)
    print(out2) if out2 is not None else None
    # print a false feature map, which corresponds to output of input1 rather than output of input2.
    # This is what confuses me all the time.

Need help! Is there someone could explain why this happened ? Thanks in advance.

1 Like