Hello,
During experiments came across the following issue.
I have custom convolution class that based on torch.nn.Conv2d implementation.
class FastConv(object):
@staticmethod
def forward(x, w, b, conv_param):
# print(x.shape, w.shape, b.shape, conv_param)
N, C, H, W = x.shape
F, _, HH, WW = w.shape
stride, pad = conv_param['stride'], conv_param['pad']
layer = torch.nn.Conv2d(C, F, (HH, WW), stride=stride, padding=pad)
layer.weight = torch.nn.Parameter(w)
layer.bias = torch.nn.Parameter(b)
tx = x.detach()
tx.requires_grad = True
out = layer(tx)
cache = (x, w, b, conv_param, tx, out, layer)
return out, cache
@staticmethod
def backward(dout, cache):
try:
x, _, _, _, tx, out, layer = cache
out.backward(dout)
dx = tx.grad.detach()
dw = layer.weight.grad.detach()
db = layer.bias.grad.detach()
layer.weight.grad = layer.bias.grad = None
except RuntimeError:
dx, dw, db = torch.zeros_like(tx), torch.zeros_like(layer.weight), torch.zeros_like(layer.bias)
return dx, dw, db
The purpose of this FastConv class is the ability to explicitly pass weights and biases each forward call and store intermediate tensors for the custom backprop (for pedagogical aims).
Apart from it, there lives a code that performs batch normalization in two ways: directly performsBatchNorm2d and the other one that simulates it by reshaping input tensor and using BatchNorm1d.
device = 'cuda'
num_inputs = 2
input_dims = (3, 16, 16)
next_filt = 16
batchnorm = True
dtype = torch.float32
kernel_size = 3
bn_param = {'mode': 'train'}
# stride and padding preserve output spatial size
conv_param = {'stride': 1, 'pad': (kernel_size - 1) // 2}
x = torch.randn(num_inputs, *input_dims, dtype=dtype, device=device)
gamma = torch.ones(input_dims[0], device=device, dtype=dtype)
beta = torch.zeros(input_dims[0], device=device, dtype=dtype)
Weight = torch.randn(next_filt, input_dims[0], kernel_size, kernel_size, dtype=dtype, device=device)
b = torch.zeros(next_filt, dtype=dtype, device=device)
N, C, H, W = x.shape
## PyTorch BN2d
try:
out = torch.nn.BatchNorm2d(C, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=device, dtype=dtype).forward(x)
out_bn_2d, _ = FastConv.forward(out, Weight, b, conv_param)
except Exception as e:
print(e)
## Pytorch BN1d
try:
ch_view = x.transpose(1,2).transpose(2,3).reshape(N * H * W, C)
out = torch.nn.BatchNorm1d(C, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=device, dtype=dtype).forward(ch_view)
out = out.reshape(N, H, W, C).transpose(2,3).transpose(1,2)
out_bn_1d, _ = FastConv.forward(out, Weight, b, conv_param)
except Exception as e:
print(e)
The thing is that if dtype=torch.float64 everything in this setup works fine, but if we change dtype=torch.float32 only BatchNorm2d implementation works. While the other fails with:
set_sizes_and_strides is not allowed on a Tensor created from .data or .detach().
If your intent is to change the metadata of a Tensor (such as sizes / strides / storage / storage_offset)
without autograd tracking the change, remove the .data / .detach() call and wrap the change in a `with torch.no_grad():` block.
For example, change:
x.data.set_(y)
to:
with torch.no_grad():
x.set_(y)
Reproduce code is in colab change dtype in third cell to see difference.
[UPD] Issue won’t occur not only if dtype==torch.float64 on cuda but with float32 on cpu.