I see your point. But how do I effectively do this given my setup?
So given that our base case is:
‘’’
from numpy import flip
import numpy as np
from scipy.signal import convolve2d, correlate2d
from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter
class ScipyConv2dFunction(Function):
@staticmethod
def forward(ctx, input, filter, bias):
# detach so we can cast to NumPy
input, filter, bias = input.detach(), filter.detach(), bias.detach()
result = correlate2d(input.numpy(), filter.numpy(), mode=‘valid’)
result += bias.numpy()
ctx.save_for_backward(input, filter, bias)
return torch.as_tensor(result, dtype=input.dtype)
@staticmethod
def backward(ctx, grad_output):
grad_output = grad_output.detach()
input, filter, bias = ctx.saved_tensors
grad_output = grad_output.numpy()
grad_bias = np.sum(grad_output, keepdims=True)
grad_input = convolve2d(grad_output, filter.numpy(), mode='full')
# the previous line can be expressed equivalently as:
# grad_input = correlate2d(grad_output, flip(flip(filter.numpy(), axis=0), axis=1), mode='full')
grad_filter = correlate2d(input.numpy(), grad_output, mode='valid')
return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float), torch.from_numpy(grad_bias).to(torch.float)
class ScipyConv2d(Module):
def init(self, filter_width, filter_height):
super(ScipyConv2d, self).init()
self.filter = Parameter(torch.randn(filter_width, filter_height))
self.bias = Parameter(torch.randn(1, 1))
def forward(self, input):
return ScipyConv2dFunction.apply(input, self.filter, self.bias)
module = ScipyConv2d(3, 3)
print("Filter and bias: ", list(module.parameters()))
input = torch.randn(10, 10, requires_grad=True)
output = module(input)
print("Output from the convolution: ", output)
output.backward(torch.randn(8, 8))
print("Gradient for the input map: ", input.grad)
from torch.autograd.gradcheck import gradcheck
moduleConv = ScipyConv2d(3, 3)
input = [torch.randn(20, 20, dtype=torch.double, requires_grad=True)]
test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
print("Are the gradients correct: ", test)
‘’’
with an output of:
‘’’
Filter and bias: [Parameter containing:
tensor([[ 2.2472, 0.2662, 1.4366],
[-2.1422, -0.2271, -0.4675],
[-0.6270, 0.6799, -0.9830]], requires_grad=True), Parameter containing:
tensor([[-0.1968]], requires_grad=True)]
Output from the convolution: tensor([[ 5.0955, -0.0619, -1.3610, -5.1838, -2.8278, -1.0325, 2.8800, 2.0740],
[-2.2504, -0.9791, 1.7379, 5.0958, 3.6570, 1.8592, 0.3560, -2.8805],
[-0.2183, -5.0285, 0.5651, -2.1244, -3.3748, -0.5493, 0.5300, 4.6890],
[ 0.6642, 1.7947, 4.3560, -1.5376, 3.4807, 0.0310, -2.7650, 0.0496],
[ 0.3749, 1.4415, 0.1217, 3.3080, -1.5355, 0.4941, -0.7830, 1.4549],
[-4.8272, 4.3554, -0.2449, 0.9998, 4.5468, 1.6827, 9.1904, -0.2611],
[ 2.8833, -1.8556, 1.0970, -4.5905, -4.7777, -3.7762, -8.5501, -2.8313],
[-4.6217, -1.2052, -9.1361, -2.6188, -5.6899, 1.3478, -2.7588, 4.6450]],
grad_fn=)
Gradient for the input map: tensor([[-1.6839e+00, 3.7211e+00, 1.7345e+00, 5.2193e+00, 1.4691e+00,
6.8384e-01, -2.2208e+00, 3.3506e-01, -1.0961e+00, 7.0068e-01],
[ 3.3596e+00, -3.2534e+00, 7.8873e-01, -4.1801e+00, 9.5837e-01,
-2.5884e-01, 3.3485e-01, -1.0153e+00, -9.0555e-01, -3.4262e-01],
[-2.3561e+00, -1.5195e+00, -4.7674e+00, 2.8600e+00, -2.8327e+00,
3.5125e+00, 2.3391e+00, 1.0551e+00, 1.7734e+00, -4.8571e-01],
[-9.2608e-01, -2.6734e+00, 5.3447e+00, -9.1576e+00, 9.7195e-01,
-8.7368e+00, -9.4512e-01, -6.1455e+00, 2.8779e-01, -2.0054e+00],
[-1.2811e+00, 4.2576e+00, -4.4287e+00, 5.7813e+00, 1.4669e+00,
-1.0919e+00, 4.6554e-01, -5.2452e+00, -1.6991e-01, -3.2144e+00],
[ 4.9970e+00, -2.8864e+00, -1.7751e+00, 2.6767e+00, -1.9646e+00,
7.2893e+00, -2.8725e+00, 8.0338e+00, -2.6189e+00, 2.4180e+00],
[-6.0182e-03, -2.2434e+00, 4.7247e+00, -3.2480e+00, -7.1975e-01,
1.2095e+00, 1.6506e+00, 3.5161e+00, -1.6777e+00, 1.6149e+00],
[-2.3076e+00, -1.0580e+00, -1.6062e+00, -5.9103e-01, -6.9412e-01,
-7.8619e-01, -2.3087e+00, -7.6855e-01, 7.4772e-01, -3.1858e-01],
[ 9.2450e-01, 4.1310e+00, -1.1323e+00, 1.2421e-01, 2.8312e+00,
-1.4285e+00, 2.6020e+00, 9.5710e-01, -1.8637e-01, 1.0927e+00],
[ 3.2821e-01, 6.0729e-01, -7.3003e-01, 1.2852e+00, 1.0680e+00,
-1.8217e+00, 1.8253e+00, -1.1010e-01, -3.8597e-02, 6.1659e-01]])
Are the gradients correct: True
‘’’
Something to note- note that I can now print the gradient input map and I’m print out that the gradients are correct (True).
Now if I make the corrections to the above code as per the errors in my last post, I get:
‘’’
import torch
from torch.autograd import Function
from torch.autograd.gradcheck import gradcheck
from numpy import flip
import numpy as np
from scipy.signal import convolve2d, correlate2d
import torch.nn as nn
#from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter
class ScipyConv2dFunction(Function):
@staticmethod
def forward(ctx, input, filter, bias):
# detach so we can cast to NumPy
print(“printing input, filter, bias type before detach”)
print(type(input), type(filter), type(bias))
#input, filter, bias = input.detach(), filter.detach(), bias.detach()
print(“printing input, filter, bias type after detach”)
print(type(input), type(filter), type(bias))
#result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode=‘valid’)
result = correlate2d(input.numpy(), filter.numpy(), mode=‘valid’)
result += bias.cpu().numpy()
#result += bias.numpy()
ctx.save_for_backward(input, filter, bias)
return torch.as_tensor(result, dtype=input.dtype)
@staticmethod
def backward(ctx, grad_output):
grad_output = grad_output.detach()
input, filter, bias = ctx.saved_tensors
grad_output = grad_output.cpu().numpy()
grad_bias = np.sum(grad_output, keepdims=True)
grad_input = convolve2d(grad_output, filter.cpu().numpy(), mode=‘full’)
#grad_input = convolve2d(grad_output, filter.numpy(), mode=‘full’)
# the previous line can be expressed equivalently as:
# grad_input = correlate2d(grad_output, flip(flip(filter.numpy(), axis=0), axis=1), mode=‘full’)
grad_filter = correlate2d(input.cpu().numpy(), grad_output, mode=‘valid’)
#grad_filter = correlate2d(input.numpy(), grad_output, mode=‘valid’)
return torch.from_numpy(grad_input).to(device), torch.from_numpy(grad_filter).to(torch.float).to(device), torch.from_numpy(grad_bias).to(torch.float).to(device)
#return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float).to(“cuda:0”), torch.from_numpy(grad_bias).to(torch.float)
#return torch.from_numpy(grad_input), torch.from_numpy(grad_filter).to(torch.float).to(“cpu”), torch.from_numpy(grad_bias).to(torch.float)
class ScipyConv2d(nn.Module):
def init(self, filter_width, filter_height):
super(ScipyConv2d, self).init()
self.filter = Parameter(torch.randn(filter_width, filter_height))
self.bias = Parameter(torch.randn(1, 1))
def forward(self, input):
return ScipyConv2dFunction.apply(input, self.filter, self.bias)
class DataParallelPassthrough(torch.nn.DataParallel):
def getattr(self, name):
try:
return super().getattr(name)
except AttributeError:
return getattr(self.module, name)
device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
#Check to see if CUDA is there:
print(“We have CUDA:”)
print(torch.cuda.is_available())
net = ScipyConv2d(3, 3)
net = DataParallelPassthrough(net)
net.to(device)
print("Filter and bias: ", list(net.parameters()))
input = torch.randn(10, 10, requires_grad=True)
input = input.to(device)
output = net(input)
print("Output from the convolution: ", output)
output.backward(torch.randn(8, 8))
print("Gradient for the input map: ",input.grad)
print(“The above code is a check. Now let’s try something else…”)
input = [torch.randn(20, 20, device=device, dtype=torch.double, requires_grad=True)]
output = net(input)
test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
print("Are the gradients correct: ", test)
‘’’
with an output of:
‘’’
We have CUDA:
True
Filter and bias: [Parameter containing:
tensor([[-0.8550, -0.0050, -1.3651],
[-0.2784, 0.8346, 0.0890],
[ 1.2413, -0.7320, 0.5601]], device=‘cuda:0’, requires_grad=True), Parameter containing:
tensor([[0.3606]], device=‘cuda:0’, requires_grad=True)]
printing input, filter, bias type before detach
<class ‘torch.Tensor’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
printing input, filter, bias type after detach
<class ‘torch.Tensor’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
‘’’
with errors of:
‘’’
TypeError Traceback (most recent call last)
in ()
71 input = torch.randn(10, 10, requires_grad=True)
72 input = input.to(device)
—> 73 output = net(input)
74 print("Output from the convolution: ", output)
75 output.backward(torch.randn(8, 8))
4 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py in forward(self, *inputs, **kwargs)
151 inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
152 if len(self.device_ids) == 1:
–> 153 return self.module(*inputs[0], **kwargs[0])
154 replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
155 outputs = self.parallel_apply(replicas, inputs, kwargs)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
in forward(self, input)
50 self.bias = Parameter(torch.randn(1, 1))
51 def forward(self, input):
—> 52 return ScipyConv2dFunction.apply(input, self.filter, self.bias)
53
54 class DataParallelPassthrough(torch.nn.DataParallel):
in forward(ctx, input, filter, bias)
22 #print(type(input), type(filter), type(bias))
23 #result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode=‘valid’)
—> 24 result = correlate2d(input.numpy(), filter.numpy(), mode=‘valid’)
25 result += bias.cpu().numpy()
26 #result += bias.numpy()
TypeError: can’t convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
‘’’
Ok, so all I need to do is to change line 21 in my code to:
‘’’
result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode=‘valid’)
‘’’
but when I do this my output is:
‘’’
We have CUDA:
True
Filter and bias: [Parameter containing:
tensor([[ 0.1570, -1.3069, -0.1185],
[-0.5576, -0.7210, -1.4785],
[-0.1286, -2.2084, -1.0298]], device=‘cuda:0’, requires_grad=True), Parameter containing:
tensor([[-1.2125]], device=‘cuda:0’, requires_grad=True)]
printing input, filter, bias type before detach
<class ‘torch.Tensor’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
printing input, filter, bias type after detach
<class ‘torch.Tensor’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
Output from the convolution: tensor([[-4.3066, -1.3450, -3.9924, 1.4882, 3.4400, -1.4760, -2.2422, 0.4295],
[ 0.9262, 1.0034, 1.8979, -1.1040, 1.4920, -0.5098, -0.0920, -2.5853],
[ 1.7836, 6.2617, -5.5159, -6.1085, -0.1257, -1.2477, -0.6303, 2.0732],
[-4.4696, -1.9317, -3.7583, -6.5660, -5.2777, -8.2078, -6.2274, -0.0184],
[-7.2691, -6.8640, -7.2429, -4.8744, -1.5732, -5.5622, -8.7324, -4.4410],
[-6.9751, -8.4269, -7.8431, -4.6612, -0.3702, -1.9843, -6.8396, -0.1316],
[-3.9828, -7.2248, -8.5095, -1.0465, 2.4089, -3.6733, -5.2893, -0.7185],
[-1.9128, 0.4661, -2.7038, -1.3125, -1.4977, -2.2579, -1.6557, 0.8427]],
grad_fn=)
Gradient for the input map: None
The above code is a check. Now let’s try something else…
printing input, filter, bias type before detach
<class ‘list’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
printing input, filter, bias type after detach
<class ‘list’> <class ‘torch.nn.parameter.Parameter’> <class ‘torch.nn.parameter.Parameter’>
/usr/local/lib/python3.6/dist-packages/torch/tensor.py:746: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won’t be populated during autograd.backward(). If you indeed want the gradient for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations.
warnings.warn("The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad "
‘’’
with an error code of:
‘’’
AttributeError Traceback (most recent call last)
in ()
75
76 input = [torch.randn(20, 20, device=device, dtype=torch.double, requires_grad=True)]
—> 77 output = net(input)
78 test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4)
79 print("Are the gradients correct: ", test)
4 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py in forward(self, *inputs, **kwargs)
151 inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
152 if len(self.device_ids) == 1:
–> 153 return self.module(*inputs[0], **kwargs[0])
154 replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
155 outputs = self.parallel_apply(replicas, inputs, kwargs)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
in forward(self, input)
47 self.bias = Parameter(torch.randn(1, 1))
48 def forward(self, input):
—> 49 return ScipyConv2dFunction.apply(input, self.filter, self.bias)
50
51 class DataParallelPassthrough(torch.nn.DataParallel):
in forward(ctx, input, filter, bias)
18 print(“printing input, filter, bias type after detach”)
19 print(type(input), type(filter), type(bias))
—> 20 result = correlate2d(input.cpu().numpy(), filter.cpu().numpy(), mode=‘valid’)
21 #result = correlate2d(input.numpy(), filter.numpy(), mode=‘valid’)
22 result += bias.cpu().numpy()
AttributeError: ‘list’ object has no attribute ‘cpu’
‘’’
So now I’m stuck with that either I insert .cpu which gives me the list error, or I remove .cpu and I get the above list error.
What’s the proper way to transfer/move between GPU usage and CPU usage?