Multi GPU use not working with modules & extensions

I’m currently trying to run the following code:

import torch
from torch.autograd import Function
from torch.autograd.gradcheck import gradcheck
from numpy import flip
import numpy as np
from scipy.signal import convolve2d, correlate2d
import torch.nn as nn
#from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter

class ScipyConv2dFunction(Function):
  @staticmethod
  def forward(ctx, input, filter, bias):
    # detach so we can cast to NumPy
    #input, filter, bias = input.detach(), filter.detach(), bias.detach()
    result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode='valid')
    #result = correlate2d(input.numpy(), filter.numpy(), mode='valid')
    result += bias.cpu().numpy()
    ctx.save_for_backward(input, filter, bias)
    return torch.as_tensor(result, dtype=input.dtype)

  @staticmethod
  def backward(ctx, grad_output):
    grad_output = grad_output.detach()
    input, filter, bias = ctx.saved_tensors
    grad_output = grad_output.cpu().numpy()
    grad_bias = np.sum(grad_output, keepdims=True)
    grad_input = convolve2d(grad_output, filter.cpu().numpy(), mode='full')
    # the previous line can be expressed equivalently as:
    # grad_input = correlate2d(grad_output, flip(flip(filter.numpy(), axis=0), axis=1), mode='full')
    grad_filter = correlate2d(input.cpu().numpy(), grad_output, mode='valid')
    return torch.from_numpy(grad_input).to(device), torch.from_numpy(grad_filter).to(torch.float).to(device), torch.from_numpy(grad_bias).to(torch.float).to(device)
    #return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float).to("cuda:0"), torch.from_numpy(grad_bias).to(torch.float)
    #return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float).to("cpu"), torch.from_numpy(grad_bias).to(torch.float)

class ScipyConv2d(nn.Module):
  def __init__(self, filter_width, filter_height):
    super(ScipyConv2d, self).__init__()
    self.filter = Parameter(torch.randn(filter_width, filter_height))
    self.bias = Parameter(torch.randn(1, 1))
  def forward(self, input):
    return ScipyConv2dFunction.apply(input, self.filter, self.bias)

class DataParallelPassthrough(torch.nn.DataParallel):
  def __getattr__(self, name):
    try:
      return super().__getattr__(name)
    except AttributeError:
      return getattr(self.module, name)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#Check to see if CUDA is there:
print("We have CUDA:")
print(torch.cuda.is_available())

net = ScipyConv2d(3, 3)
net = DataParallelPassthrough(net)
net.to(device)

print("Filter and bias: ", list(net.parameters()))
input = torch.randn(10, 10, requires_grad=True)
input = input.to(device)
output = net(input)
print("Output from the convolution: ", output)
output.backward(torch.randn(8, 8))
print("Gradient for the input map: ",input.grad)
print("The above code is a check. Now let's try something else...")

input = [torch.randn(20, 20, device=device, dtype=torch.double, requires_grad=True)]
output = net(input)
test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
print("Are the gradients correct: ", test)

However I get the following error:
AttributeError: ‘list’ object has no attribute ‘cpu’
When I fix this error (just uncomment the next commented line), I get this error:
TypeError: can’t convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

I’m totally unsure of how to go about this.
Background - currently running on Google Colab with GPU enabled
The code above is essentially trying to blend this link and this link together.

Which line of code raises the first error? Could you check the type of the object?

Unrelated to this error, but it seems all operations are performed on the CPU, so I’m not sure why you are pushing the parameters to the GPU. Could you explain your use case a bit?

Here is updated code with a few more comments and print statements:

import torch
from torch.autograd import Function
from torch.autograd.gradcheck import gradcheck
from numpy import flip
import numpy as np
from scipy.signal import convolve2d, correlate2d
import torch.nn as nn
#from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter

class ScipyConv2dFunction(Function):
  @staticmethod
  def forward(ctx, input, filter, bias):
    # detach so we can cast to NumPy
    print("printing input, filter, bias type before detach")
    print(type(input), type(filter), type(bias))
    input, filter, bias = input.detach(), filter.detach(), bias.detach()
    #result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode='valid')
    print("printing input, filter, bias type after detach")
    print(type(input), type(filter), type(bias))
    result = correlate2d(input.numpy(), filter.numpy(), mode='valid')
    #result += bias.cpu().numpy()
    result += bias.numpy()
    ctx.save_for_backward(input, filter, bias)
    return torch.as_tensor(result, dtype=input.dtype)

  @staticmethod
  def backward(ctx, grad_output):
    grad_output = grad_output.detach()
    input, filter, bias = ctx.saved_tensors
    grad_output = grad_output.cpu().numpy()
    grad_bias = np.sum(grad_output, keepdims=True)
    #grad_input = convolve2d(grad_output, filter.cpu().numpy(), mode='full')
    grad_input = convolve2d(grad_output, filter.numpy(), mode='full')
    # the previous line can be expressed equivalently as:
    # grad_input = correlate2d(grad_output, flip(flip(filter.numpy(), axis=0), axis=1), mode='full')
    #grad_filter = correlate2d(input.cpu().numpy(), grad_output, mode='valid')
    grad_filter = correlate2d(input.numpy(), grad_output, mode='valid')
    return torch.from_numpy(grad_input).to(device), torch.from_numpy(grad_filter).to(torch.float).to(device), torch.from_numpy(grad_bias).to(torch.float).to(device)
    #return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float).to("cuda:0"), torch.from_numpy(grad_bias).to(torch.float)
    #return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float).to("cpu"), torch.from_numpy(grad_bias).to(torch.float)

class ScipyConv2d(nn.Module):
  def __init__(self, filter_width, filter_height):
    super(ScipyConv2d, self).__init__()
    self.filter = Parameter(torch.randn(filter_width, filter_height))
    self.bias = Parameter(torch.randn(1, 1))
  def forward(self, input):
    return ScipyConv2dFunction.apply(input, self.filter, self.bias)

class DataParallelPassthrough(torch.nn.DataParallel):
  def __getattr__(self, name):
    try:
      return super().__getattr__(name)
    except AttributeError:
      return getattr(self.module, name)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#Check to see if CUDA is there:
print("We have CUDA:")
print(torch.cuda.is_available())

net = ScipyConv2d(3, 3)
net = DataParallelPassthrough(net)
net.to(device)

print("Filter and bias: ", list(net.parameters()))
input = torch.randn(10, 10, requires_grad=True)
input = input.to(device)
output = net(input)
print("Output from the convolution: ", output)
output.backward(torch.randn(8, 8))
print("Gradient for the input map: ",input.grad)
print("The above code is a check. Now let's try something else...")

input = [torch.randn(20, 20, device=device, dtype=torch.double, requires_grad=True)]
output = net(input)
test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
print("Are the gradients correct: ", test)

The error code reads:

TypeError                                 Traceback (most recent call last)

<ipython-input-...> in <module>()
     68 input = torch.randn(10, 10, requires_grad=True)
     69 input = input.to(device)
---> 70 output = net(input)
     71 print("Output from the convolution: ", output)
     72 output.backward(torch.randn(8, 8))

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py in forward(self, *inputs, **kwargs)
    151         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
    152         if len(self.device_ids) == 1:
--> 153             return self.module(*inputs[0], **kwargs[0])
    154         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
    155         outputs = self.parallel_apply(replicas, inputs, kwargs)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

<ipython-input-...> in forward(self, input)
     47     self.bias = Parameter(torch.randn(1, 1))
     48   def forward(self, input):
---> 49     return ScipyConv2dFunction.apply(input, self.filter, self.bias)
     50 
     51 class DataParallelPassthrough(torch.nn.DataParallel):

<ipython-input-...> in forward(ctx, input, filter, bias)
     19     print("printing input, filter, bias type after detach")
     20     print(type(input), type(filter), type(bias))
---> 21     result = correlate2d(input.numpy(), filter.numpy(), mode='valid')
     22     #result += bias.cpu().numpy()
     23     result += bias.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

So line 21 is the problem.

So the use case is the following- I have a numpy function that takes in and spits out values which does operations that PyTorch can’t do. I’m trying to have PyTorch read in/do the ML forward pass/backprop on the Google Colab GPUs. I’m trying to move all of the computation off of the CPU and onto the GPUs and I was hoping that class DataParallelPassthrough would take care of the custom modules around the numpy/scipy function, however it’s not working. As a base case (and to reuse code that I know works) I’m just pasting together this and this.

Numpy does not implement GPU support, so all your numpy operations have to use numpy arrays, which are stored on the CPU.
As the error message points out, input.numpy() won’t work, if input is a CUDATensor, so you would have to move it to the CPU before.

I see your point. But how do I effectively do this given my setup?
So given that our base case is:
‘’’
from numpy import flip
import numpy as np
from scipy.signal import convolve2d, correlate2d
from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter

class ScipyConv2dFunction(Function):
@staticmethod
def forward(ctx, input, filter, bias):
# detach so we can cast to NumPy
input, filter, bias = input.detach(), filter.detach(), bias.detach()
result = correlate2d(input.numpy(), filter.numpy(), mode=‘valid’)
result += bias.numpy()
ctx.save_for_backward(input, filter, bias)
return torch.as_tensor(result, dtype=input.dtype)

@staticmethod
def backward(ctx, grad_output):
    grad_output = grad_output.detach()
    input, filter, bias = ctx.saved_tensors
    grad_output = grad_output.numpy()
    grad_bias = np.sum(grad_output, keepdims=True)
    grad_input = convolve2d(grad_output, filter.numpy(), mode='full')
    # the previous line can be expressed equivalently as:
    # grad_input = correlate2d(grad_output, flip(flip(filter.numpy(), axis=0), axis=1), mode='full')
    grad_filter = correlate2d(input.numpy(), grad_output, mode='valid')
    return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float), torch.from_numpy(grad_bias).to(torch.float)

class ScipyConv2d(Module):
def init(self, filter_width, filter_height):
super(ScipyConv2d, self).init()
self.filter = Parameter(torch.randn(filter_width, filter_height))
self.bias = Parameter(torch.randn(1, 1))

def forward(self, input):
    return ScipyConv2dFunction.apply(input, self.filter, self.bias)

module = ScipyConv2d(3, 3)
print("Filter and bias: ", list(module.parameters()))
input = torch.randn(10, 10, requires_grad=True)
output = module(input)
print("Output from the convolution: ", output)
output.backward(torch.randn(8, 8))
print("Gradient for the input map: ", input.grad)

from torch.autograd.gradcheck import gradcheck

moduleConv = ScipyConv2d(3, 3)

input = [torch.randn(20, 20, dtype=torch.double, requires_grad=True)]
test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
print("Are the gradients correct: ", test)
‘’’

with an output of:

‘’’
Filter and bias: [Parameter containing:
tensor([[ 2.2472, 0.2662, 1.4366],
[-2.1422, -0.2271, -0.4675],
[-0.6270, 0.6799, -0.9830]], requires_grad=True), Parameter containing:
tensor([[-0.1968]], requires_grad=True)]
Output from the convolution: tensor([[ 5.0955, -0.0619, -1.3610, -5.1838, -2.8278, -1.0325, 2.8800, 2.0740],
[-2.2504, -0.9791, 1.7379, 5.0958, 3.6570, 1.8592, 0.3560, -2.8805],
[-0.2183, -5.0285, 0.5651, -2.1244, -3.3748, -0.5493, 0.5300, 4.6890],
[ 0.6642, 1.7947, 4.3560, -1.5376, 3.4807, 0.0310, -2.7650, 0.0496],
[ 0.3749, 1.4415, 0.1217, 3.3080, -1.5355, 0.4941, -0.7830, 1.4549],
[-4.8272, 4.3554, -0.2449, 0.9998, 4.5468, 1.6827, 9.1904, -0.2611],
[ 2.8833, -1.8556, 1.0970, -4.5905, -4.7777, -3.7762, -8.5501, -2.8313],
[-4.6217, -1.2052, -9.1361, -2.6188, -5.6899, 1.3478, -2.7588, 4.6450]],
grad_fn=)
Gradient for the input map: tensor([[-1.6839e+00, 3.7211e+00, 1.7345e+00, 5.2193e+00, 1.4691e+00,
6.8384e-01, -2.2208e+00, 3.3506e-01, -1.0961e+00, 7.0068e-01],
[ 3.3596e+00, -3.2534e+00, 7.8873e-01, -4.1801e+00, 9.5837e-01,
-2.5884e-01, 3.3485e-01, -1.0153e+00, -9.0555e-01, -3.4262e-01],
[-2.3561e+00, -1.5195e+00, -4.7674e+00, 2.8600e+00, -2.8327e+00,
3.5125e+00, 2.3391e+00, 1.0551e+00, 1.7734e+00, -4.8571e-01],
[-9.2608e-01, -2.6734e+00, 5.3447e+00, -9.1576e+00, 9.7195e-01,
-8.7368e+00, -9.4512e-01, -6.1455e+00, 2.8779e-01, -2.0054e+00],
[-1.2811e+00, 4.2576e+00, -4.4287e+00, 5.7813e+00, 1.4669e+00,
-1.0919e+00, 4.6554e-01, -5.2452e+00, -1.6991e-01, -3.2144e+00],
[ 4.9970e+00, -2.8864e+00, -1.7751e+00, 2.6767e+00, -1.9646e+00,
7.2893e+00, -2.8725e+00, 8.0338e+00, -2.6189e+00, 2.4180e+00],
[-6.0182e-03, -2.2434e+00, 4.7247e+00, -3.2480e+00, -7.1975e-01,
1.2095e+00, 1.6506e+00, 3.5161e+00, -1.6777e+00, 1.6149e+00],
[-2.3076e+00, -1.0580e+00, -1.6062e+00, -5.9103e-01, -6.9412e-01,
-7.8619e-01, -2.3087e+00, -7.6855e-01, 7.4772e-01, -3.1858e-01],
[ 9.2450e-01, 4.1310e+00, -1.1323e+00, 1.2421e-01, 2.8312e+00,
-1.4285e+00, 2.6020e+00, 9.5710e-01, -1.8637e-01, 1.0927e+00],
[ 3.2821e-01, 6.0729e-01, -7.3003e-01, 1.2852e+00, 1.0680e+00,
-1.8217e+00, 1.8253e+00, -1.1010e-01, -3.8597e-02, 6.1659e-01]])
Are the gradients correct: True
‘’’

Something to note- note that I can now print the gradient input map and I’m print out that the gradients are correct (True).

Now if I make the corrections to the above code as per the errors in my last post, I get:

‘’’
import torch
from torch.autograd import Function
from torch.autograd.gradcheck import gradcheck
from numpy import flip
import numpy as np
from scipy.signal import convolve2d, correlate2d
import torch.nn as nn
#from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter

class ScipyConv2dFunction(Function):
@staticmethod
def forward(ctx, input, filter, bias):
# detach so we can cast to NumPy
print(“printing input, filter, bias type before detach”)
print(type(input), type(filter), type(bias))
#input, filter, bias = input.detach(), filter.detach(), bias.detach()
print(“printing input, filter, bias type after detach”)
print(type(input), type(filter), type(bias))
#result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode=‘valid’)
result = correlate2d(input.numpy(), filter.numpy(), mode=‘valid’)
result += bias.cpu().numpy()
#result += bias.numpy()
ctx.save_for_backward(input, filter, bias)
return torch.as_tensor(result, dtype=input.dtype)

@staticmethod
def backward(ctx, grad_output):
grad_output = grad_output.detach()
input, filter, bias = ctx.saved_tensors
grad_output = grad_output.cpu().numpy()
grad_bias = np.sum(grad_output, keepdims=True)
grad_input = convolve2d(grad_output, filter.cpu().numpy(), mode=‘full’)
#grad_input = convolve2d(grad_output, filter.numpy(), mode=‘full’)
# the previous line can be expressed equivalently as:
# grad_input = correlate2d(grad_output, flip(flip(filter.numpy(), axis=0), axis=1), mode=‘full’)
grad_filter = correlate2d(input.cpu().numpy(), grad_output, mode=‘valid’)
#grad_filter = correlate2d(input.numpy(), grad_output, mode=‘valid’)
return torch.from_numpy(grad_input).to(device), torch.from_numpy(grad_filter).to(torch.float).to(device), torch.from_numpy(grad_bias).to(torch.float).to(device)
#return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float).to(“cuda:0”), torch.from_numpy(grad_bias).to(torch.float)
#return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float).to(“cpu”), torch.from_numpy(grad_bias).to(torch.float)

class ScipyConv2d(nn.Module):
def init(self, filter_width, filter_height):
super(ScipyConv2d, self).init()
self.filter = Parameter(torch.randn(filter_width, filter_height))
self.bias = Parameter(torch.randn(1, 1))
def forward(self, input):
return ScipyConv2dFunction.apply(input, self.filter, self.bias)

class DataParallelPassthrough(torch.nn.DataParallel):
def getattr(self, name):
try:
return super().getattr(name)
except AttributeError:
return getattr(self.module, name)

device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
#Check to see if CUDA is there:
print(“We have CUDA:”)
print(torch.cuda.is_available())

net = ScipyConv2d(3, 3)
net = DataParallelPassthrough(net)
net.to(device)

print("Filter and bias: ", list(net.parameters()))
input = torch.randn(10, 10, requires_grad=True)
input = input.to(device)
output = net(input)
print("Output from the convolution: ", output)
output.backward(torch.randn(8, 8))
print("Gradient for the input map: ",input.grad)
print(“The above code is a check. Now let’s try something else…”)

input = [torch.randn(20, 20, device=device, dtype=torch.double, requires_grad=True)]
output = net(input)
test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
print("Are the gradients correct: ", test)
‘’’

with an output of:
‘’’
We have CUDA:
True
Filter and bias: [Parameter containing:
tensor([[-0.8550, -0.0050, -1.3651],
[-0.2784, 0.8346, 0.0890],
[ 1.2413, -0.7320, 0.5601]], device=‘cuda:0’, requires_grad=True), Parameter containing:
tensor([[0.3606]], device=‘cuda:0’, requires_grad=True)]
printing input, filter, bias type before detach
<class ‘torch.Tensor’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
printing input, filter, bias type after detach
<class ‘torch.Tensor’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
‘’’

with errors of:
‘’’
TypeError Traceback (most recent call last)

in ()
71 input = torch.randn(10, 10, requires_grad=True)
72 input = input.to(device)
—> 73 output = net(input)
74 print("Output from the convolution: ", output)
75 output.backward(torch.randn(8, 8))

4 frames

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)

/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py in forward(self, *inputs, **kwargs)
151 inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
152 if len(self.device_ids) == 1:
–> 153 return self.module(*inputs[0], **kwargs[0])
154 replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
155 outputs = self.parallel_apply(replicas, inputs, kwargs)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)

in forward(self, input)
50 self.bias = Parameter(torch.randn(1, 1))
51 def forward(self, input):
—> 52 return ScipyConv2dFunction.apply(input, self.filter, self.bias)
53
54 class DataParallelPassthrough(torch.nn.DataParallel):

in forward(ctx, input, filter, bias)
22 #print(type(input), type(filter), type(bias))
23 #result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode=‘valid’)
—> 24 result = correlate2d(input.numpy(), filter.numpy(), mode=‘valid’)
25 result += bias.cpu().numpy()
26 #result += bias.numpy()

TypeError: can’t convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
‘’’

Ok, so all I need to do is to change line 21 in my code to:
‘’’
result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode=‘valid’)
‘’’

but when I do this my output is:

‘’’
We have CUDA:
True
Filter and bias: [Parameter containing:
tensor([[ 0.1570, -1.3069, -0.1185],
[-0.5576, -0.7210, -1.4785],
[-0.1286, -2.2084, -1.0298]], device=‘cuda:0’, requires_grad=True), Parameter containing:
tensor([[-1.2125]], device=‘cuda:0’, requires_grad=True)]
printing input, filter, bias type before detach
<class ‘torch.Tensor’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
printing input, filter, bias type after detach
<class ‘torch.Tensor’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
Output from the convolution: tensor([[-4.3066, -1.3450, -3.9924, 1.4882, 3.4400, -1.4760, -2.2422, 0.4295],
[ 0.9262, 1.0034, 1.8979, -1.1040, 1.4920, -0.5098, -0.0920, -2.5853],
[ 1.7836, 6.2617, -5.5159, -6.1085, -0.1257, -1.2477, -0.6303, 2.0732],
[-4.4696, -1.9317, -3.7583, -6.5660, -5.2777, -8.2078, -6.2274, -0.0184],
[-7.2691, -6.8640, -7.2429, -4.8744, -1.5732, -5.5622, -8.7324, -4.4410],
[-6.9751, -8.4269, -7.8431, -4.6612, -0.3702, -1.9843, -6.8396, -0.1316],
[-3.9828, -7.2248, -8.5095, -1.0465, 2.4089, -3.6733, -5.2893, -0.7185],
[-1.9128, 0.4661, -2.7038, -1.3125, -1.4977, -2.2579, -1.6557, 0.8427]],
grad_fn=)
Gradient for the input map: None
The above code is a check. Now let’s try something else…
printing input, filter, bias type before detach
<class ‘list’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
printing input, filter, bias type after detach
<class ‘list’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>

/usr/local/lib/python3.6/dist-packages/torch/tensor.py:746: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won’t be populated during autograd.backward(). If you indeed want the gradient for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations.
warnings.warn("The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad "

‘’’

with an error code of:

‘’’
AttributeError Traceback (most recent call last)

in ()
75
76 input = [torch.randn(20, 20, device=device, dtype=torch.double, requires_grad=True)]
—> 77 output = net(input)
78 test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
79 print("Are the gradients correct: ", test)

4 frames

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)

/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py in forward(self, *inputs, **kwargs)
151 inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
152 if len(self.device_ids) == 1:
–> 153 return self.module(*inputs[0], **kwargs[0])
154 replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
155 outputs = self.parallel_apply(replicas, inputs, kwargs)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)

in forward(self, input)
47 self.bias = Parameter(torch.randn(1, 1))
48 def forward(self, input):
—> 49 return ScipyConv2dFunction.apply(input, self.filter, self.bias)
50
51 class DataParallelPassthrough(torch.nn.DataParallel):

in forward(ctx, input, filter, bias)
18 print(“printing input, filter, bias type after detach”)
19 print(type(input), type(filter), type(bias))
—> 20 result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode=‘valid’)
21 #result = correlate2d(input.numpy(), filter.numpy(), mode=‘valid’)
22 result += bias.cpu().numpy()

AttributeError: ‘list’ object has no attribute ‘cpu’
‘’’

So now I’m stuck with that either I insert .cpu which gives me the list error, or I remove .cpu and I get the above list error.

What’s the proper way to transfer/move between GPU usage and CPU usage?

Could you please format the post by wrapping the code and error messages into three backticks ``` as in the previous posts?
It’s currently hard to debug and I can’t find any GPU usage, so I’m still unsure why you would like to push the parameters to the GPU at all.

I apologize- when I tried posting my response my post was marked as spam so I couldn’t edit anything. I’ll edit my post now.

TL;DR - When I try converting my GPU variables to CPU (for numpy), I correct the thrown errors to the point where no matter what I do, I can’t get my code to work.

As per your last post - I see your point. But how do I effectively do this given my setup?
So given that our base case is:

from numpy import flip
import numpy as np
from scipy.signal import convolve2d, correlate2d
from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter

class ScipyConv2dFunction(Function):
    @staticmethod
    def forward(ctx, input, filter, bias):
        # detach so we can cast to NumPy
        input, filter, bias = input.detach(), filter.detach(), bias.detach()
        result = correlate2d(input.numpy(), filter.numpy(), mode='valid')
        result += bias.numpy()
        ctx.save_for_backward(input, filter, bias)
        return torch.as_tensor(result, dtype=input.dtype)

    @staticmethod
    def backward(ctx, grad_output):
        grad_output = grad_output.detach()
        input, filter, bias = ctx.saved_tensors
        grad_output = grad_output.numpy()
        grad_bias = np.sum(grad_output, keepdims=True)
        grad_input = convolve2d(grad_output, filter.numpy(), mode='full')
        # the previous line can be expressed equivalently as:
        # grad_input = correlate2d(grad_output, flip(flip(filter.numpy(), axis=0), axis=1), mode='full')
        grad_filter = correlate2d(input.numpy(), grad_output, mode='valid')
        return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float), torch.from_numpy(grad_bias).to(torch.float)


class ScipyConv2d(Module):
    def __init__(self, filter_width, filter_height):
        super(ScipyConv2d, self).__init__()
        self.filter = Parameter(torch.randn(filter_width, filter_height))
        self.bias = Parameter(torch.randn(1, 1))

    def forward(self, input):
        return ScipyConv2dFunction.apply(input, self.filter, self.bias)

module = ScipyConv2d(3, 3)
print("Filter and bias: ", list(module.parameters()))
input = torch.randn(10, 10, requires_grad=True)
output = module(input)
print("Output from the convolution: ", output)
output.backward(torch.randn(8, 8))
print("Gradient for the input map: ", input.grad)

from torch.autograd.gradcheck import gradcheck

moduleConv = ScipyConv2d(3, 3)

input = [torch.randn(20, 20, dtype=torch.double, requires_grad=True)]
test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
print("Are the gradients correct: ", test)

with an output of:

Filter and bias:  [Parameter containing:
tensor([[ 2.2472,  0.2662,  1.4366],
        [-2.1422, -0.2271, -0.4675],
        [-0.6270,  0.6799, -0.9830]], requires_grad=True), Parameter containing:
tensor([[-0.1968]], requires_grad=True)]
Output from the convolution:  tensor([[ 5.0955, -0.0619, -1.3610, -5.1838, -2.8278, -1.0325,  2.8800,  2.0740],
        [-2.2504, -0.9791,  1.7379,  5.0958,  3.6570,  1.8592,  0.3560, -2.8805],
        [-0.2183, -5.0285,  0.5651, -2.1244, -3.3748, -0.5493,  0.5300,  4.6890],
        [ 0.6642,  1.7947,  4.3560, -1.5376,  3.4807,  0.0310, -2.7650,  0.0496],
        [ 0.3749,  1.4415,  0.1217,  3.3080, -1.5355,  0.4941, -0.7830,  1.4549],
        [-4.8272,  4.3554, -0.2449,  0.9998,  4.5468,  1.6827,  9.1904, -0.2611],
        [ 2.8833, -1.8556,  1.0970, -4.5905, -4.7777, -3.7762, -8.5501, -2.8313],
        [-4.6217, -1.2052, -9.1361, -2.6188, -5.6899,  1.3478, -2.7588,  4.6450]],
       grad_fn=<ScipyConv2dFunctionBackward>)
Gradient for the input map:  tensor([[-1.6839e+00,  3.7211e+00,  1.7345e+00,  5.2193e+00,  1.4691e+00,
          6.8384e-01, -2.2208e+00,  3.3506e-01, -1.0961e+00,  7.0068e-01],
        [ 3.3596e+00, -3.2534e+00,  7.8873e-01, -4.1801e+00,  9.5837e-01,
         -2.5884e-01,  3.3485e-01, -1.0153e+00, -9.0555e-01, -3.4262e-01],
        [-2.3561e+00, -1.5195e+00, -4.7674e+00,  2.8600e+00, -2.8327e+00,
          3.5125e+00,  2.3391e+00,  1.0551e+00,  1.7734e+00, -4.8571e-01],
        [-9.2608e-01, -2.6734e+00,  5.3447e+00, -9.1576e+00,  9.7195e-01,
         -8.7368e+00, -9.4512e-01, -6.1455e+00,  2.8779e-01, -2.0054e+00],
        [-1.2811e+00,  4.2576e+00, -4.4287e+00,  5.7813e+00,  1.4669e+00,
         -1.0919e+00,  4.6554e-01, -5.2452e+00, -1.6991e-01, -3.2144e+00],
        [ 4.9970e+00, -2.8864e+00, -1.7751e+00,  2.6767e+00, -1.9646e+00,
          7.2893e+00, -2.8725e+00,  8.0338e+00, -2.6189e+00,  2.4180e+00],
        [-6.0182e-03, -2.2434e+00,  4.7247e+00, -3.2480e+00, -7.1975e-01,
          1.2095e+00,  1.6506e+00,  3.5161e+00, -1.6777e+00,  1.6149e+00],
        [-2.3076e+00, -1.0580e+00, -1.6062e+00, -5.9103e-01, -6.9412e-01,
         -7.8619e-01, -2.3087e+00, -7.6855e-01,  7.4772e-01, -3.1858e-01],
        [ 9.2450e-01,  4.1310e+00, -1.1323e+00,  1.2421e-01,  2.8312e+00,
         -1.4285e+00,  2.6020e+00,  9.5710e-01, -1.8637e-01,  1.0927e+00],
        [ 3.2821e-01,  6.0729e-01, -7.3003e-01,  1.2852e+00,  1.0680e+00,
         -1.8217e+00,  1.8253e+00, -1.1010e-01, -3.8597e-02,  6.1659e-01]])
Are the gradients correct:  True

Something to note- note that I can now print the gradient input map and I’m print out that the gradients are correct (True).

Now if I make the corrections to the above code as per the errors in my last post, I get:

import torch
from torch.autograd import Function
from torch.autograd.gradcheck import gradcheck
from numpy import flip
import numpy as np
from scipy.signal import convolve2d, correlate2d
import torch.nn as nn
#from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter

class ScipyConv2dFunction(Function):
  @staticmethod
  def forward(ctx, input, filter, bias):
    # detach so we can cast to NumPy
    print("printing input, filter, bias type before detach")
    print(type(input), type(filter), type(bias))
    #input, filter, bias = input.detach(), filter.detach(), bias.detach()
    print("printing input, filter, bias type after detach")
    print(type(input), type(filter), type(bias))
    #result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode='valid')
    result = correlate2d(input.numpy(), filter.numpy(), mode='valid')
    result += bias.cpu().numpy()
    #result += bias.numpy()
    ctx.save_for_backward(input, filter, bias)
    return torch.as_tensor(result, dtype=input.dtype)

  @staticmethod
  def backward(ctx, grad_output):
    grad_output = grad_output.detach()
    input, filter, bias = ctx.saved_tensors
    grad_output = grad_output.cpu().numpy()
    grad_bias = np.sum(grad_output, keepdims=True)
    grad_input = convolve2d(grad_output, filter.cpu().numpy(), mode='full')
    #grad_input = convolve2d(grad_output, filter.numpy(), mode='full')
    # the previous line can be expressed equivalently as:
    # grad_input = correlate2d(grad_output, flip(flip(filter.numpy(), axis=0), axis=1), mode='full')
    grad_filter = correlate2d(input.cpu().numpy(), grad_output, mode='valid')
    #grad_filter = correlate2d(input.numpy(), grad_output, mode='valid')
    return torch.from_numpy(grad_input).to(device), torch.from_numpy(grad_filter).to(torch.float).to(device), torch.from_numpy(grad_bias).to(torch.float).to(device)
    #return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float).to("cuda:0"), torch.from_numpy(grad_bias).to(torch.float)
    #return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float).to("cpu"), torch.from_numpy(grad_bias).to(torch.float)

class ScipyConv2d(nn.Module):
  def __init__(self, filter_width, filter_height):
    super(ScipyConv2d, self).__init__()
    self.filter = Parameter(torch.randn(filter_width, filter_height))
    self.bias = Parameter(torch.randn(1, 1))
  def forward(self, input):
    return ScipyConv2dFunction.apply(input, self.filter, self.bias)

class DataParallelPassthrough(torch.nn.DataParallel):
  def __getattr__(self, name):
    try:
      return super().__getattr__(name)
    except AttributeError:
      return getattr(self.module, name)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#Check to see if CUDA is there:
print("We have CUDA:")
print(torch.cuda.is_available())

net = ScipyConv2d(3, 3)
net = DataParallelPassthrough(net)
net.to(device)

print("Filter and bias: ", list(net.parameters()))
input = torch.randn(10, 10, requires_grad=True)
input = input.to(device)
output = net(input)
print("Output from the convolution: ", output)
output.backward(torch.randn(8, 8))
print("Gradient for the input map: ",input.grad)
print("The above code is a check. Now let's try something else...")

input = [torch.randn(20, 20, device=device, dtype=torch.double, requires_grad=True)]
output = net(input)
test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
print("Are the gradients correct: ", test)

with an output of:

We have CUDA:
True
Filter and bias:  [Parameter containing:
tensor([[-0.8550, -0.0050, -1.3651],
        [-0.2784,  0.8346,  0.0890],
        [ 1.2413, -0.7320,  0.5601]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([[0.3606]], device='cuda:0', requires_grad=True)]
printing input, filter, bias type before detach
<class 'torch.Tensor'> <class 'torch.nn.parameter.Parameter'> <class 'torch.nn.parameter.Parameter'>
printing input, filter, bias type after detach
<class 'torch.Tensor'> <class 'torch.nn.parameter.Parameter'> <class 'torch.nn.parameter.Parameter'>

with errors of:

TypeError                                 Traceback (most recent call last)

<ipython-input-44-30f0907c14d3> in <module>()
     71 input = torch.randn(10, 10, requires_grad=True)
     72 input = input.to(device)
---> 73 output = net(input)
     74 print("Output from the convolution: ", output)
     75 output.backward(torch.randn(8, 8))

4 frames

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py in forward(self, *inputs, **kwargs)
    151         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
    152         if len(self.device_ids) == 1:
--> 153             return self.module(*inputs[0], **kwargs[0])
    154         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
    155         outputs = self.parallel_apply(replicas, inputs, kwargs)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

<ipython-input-44-30f0907c14d3> in forward(self, input)
     50     self.bias = Parameter(torch.randn(1, 1))
     51   def forward(self, input):
---> 52     return ScipyConv2dFunction.apply(input, self.filter, self.bias)
     53 
     54 class DataParallelPassthrough(torch.nn.DataParallel):

<ipython-input-44-30f0907c14d3> in forward(ctx, input, filter, bias)
     22     #print(type(input), type(filter), type(bias))
     23     #result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode='valid')
---> 24     result = correlate2d(input.numpy(), filter.numpy(), mode='valid')
     25     result += bias.cpu().numpy()
     26     #result += bias.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

Ok, so all I need to do is to change line 21 in my code to:

result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode='valid')

but when I do this my output is:

We have CUDA:
True
Filter and bias:  [Parameter containing:
tensor([[ 0.1570, -1.3069, -0.1185],
        [-0.5576, -0.7210, -1.4785],
        [-0.1286, -2.2084, -1.0298]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([[-1.2125]], device='cuda:0', requires_grad=True)]
printing input, filter, bias type before detach
<class 'torch.Tensor'> <class 'torch.nn.parameter.Parameter'> <class 'torch.nn.parameter.Parameter'>
printing input, filter, bias type after detach
<class 'torch.Tensor'> <class 'torch.nn.parameter.Parameter'> <class 'torch.nn.parameter.Parameter'>
Output from the convolution:  tensor([[-4.3066, -1.3450, -3.9924,  1.4882,  3.4400, -1.4760, -2.2422,  0.4295],
        [ 0.9262,  1.0034,  1.8979, -1.1040,  1.4920, -0.5098, -0.0920, -2.5853],
        [ 1.7836,  6.2617, -5.5159, -6.1085, -0.1257, -1.2477, -0.6303,  2.0732],
        [-4.4696, -1.9317, -3.7583, -6.5660, -5.2777, -8.2078, -6.2274, -0.0184],
        [-7.2691, -6.8640, -7.2429, -4.8744, -1.5732, -5.5622, -8.7324, -4.4410],
        [-6.9751, -8.4269, -7.8431, -4.6612, -0.3702, -1.9843, -6.8396, -0.1316],
        [-3.9828, -7.2248, -8.5095, -1.0465,  2.4089, -3.6733, -5.2893, -0.7185],
        [-1.9128,  0.4661, -2.7038, -1.3125, -1.4977, -2.2579, -1.6557,  0.8427]],
       grad_fn=<ScipyConv2dFunctionBackward>)
Gradient for the input map:  None
The above code is a check. Now let's try something else...
printing input, filter, bias type before detach
<class 'list'> <class 'torch.nn.parameter.Parameter'> <class 'torch.nn.parameter.Parameter'>
printing input, filter, bias type after detach
<class 'list'> <class 'torch.nn.parameter.Parameter'> <class 'torch.nn.parameter.Parameter'>

/usr/local/lib/python3.6/dist-packages/torch/tensor.py:746: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the gradient for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations.
  warnings.warn("The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad "

with an error code of:

AttributeError                            Traceback (most recent call last)

<ipython-input-45-d50ce7258182> in <module>()
     75 
     76 input = [torch.randn(20, 20, device=device, dtype=torch.double, requires_grad=True)]
---> 77 output = net(input)
     78 test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
     79 print("Are the gradients correct: ", test)

4 frames

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py in forward(self, *inputs, **kwargs)
    151         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
    152         if len(self.device_ids) == 1:
--> 153             return self.module(*inputs[0], **kwargs[0])
    154         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
    155         outputs = self.parallel_apply(replicas, inputs, kwargs)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

<ipython-input-45-d50ce7258182> in forward(self, input)
     47     self.bias = Parameter(torch.randn(1, 1))
     48   def forward(self, input):
---> 49     return ScipyConv2dFunction.apply(input, self.filter, self.bias)
     50 
     51 class DataParallelPassthrough(torch.nn.DataParallel):

<ipython-input-45-d50ce7258182> in forward(ctx, input, filter, bias)
     18     print("printing input, filter, bias type after detach")
     19     print(type(input), type(filter), type(bias))
---> 20     result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode='valid')
     21     #result = correlate2d(input.numpy(), filter.numpy(), mode='valid')
     22     result += bias.cpu().numpy()

AttributeError: 'list' object has no attribute 'cpu'

So now I’m stuck with that either I insert .cpu which gives me the list error, or I remove .cpu and I get the above list error.

Furthermore I get

Gradient for the input map:  None

this is especially worrisome since as per my base case code (at the very top) I should be producing an output.

What’s the proper way to transfer/move between GPU and CPU usage like in the above code?