Broadcasting with AutoGrad doesn't work

Repost of this Stackoverflow question

I can’t seem to get broadcasting to work with autograd in pytorch 0.4.0! Below is a minimal code example that reproduces my problem. I would like to find a single value “bias”, which minimizes the loss over the dataset. The understand the error message as it wants to backpropagate a vector with 5 entries into a scalar, which it cannot figure out. However, this is the whole idea of broadcasting. The behavior I expected was that it would propagate the mean of the error back to the broadcasted scalar value (here bias).

Please advice.


import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from import Dataset


class AddBias(torch.autograd.Function):
    def forward(ctx, input, bias):
        ctx.save_for_backward(input, bias)
        return input - bias
    def backward(ctx, grad_out):
        input, bias = ctx.saved_tensors
        grad_in = grad_bias = None
        len_grad = len(ctx.needs_input_grad)
        assert len_grad in {0, 1, 2}
        if ctx.needs_input_grad[0]: grad_in = grad_out
        if len_grad == 2: grad_bias = -1 * grad_out 
        return grad_in, grad_bias

class BiasModel(nn.Module):
    def __init__(self, size):
        super(BiasModel, self).__init__()
        self.bias_model = AddBias.apply
        self.bias = nn.Parameter(torch.tensor(0.5, dtype=torch.float, requires_grad=True))
    def forward(self, arr): return self.bias_model(arr[:], self.bias).unsqueeze(-1)

class MyData(Dataset):
    def __init__(self, data): = data
    def __len__(self): return len(
    def __getitem__(self, i): 
        arr = torch.tensor(data[i], dtype=torch.float)
        target = torch.tensor(arr > 0.5, dtype=torch.float).unsqueeze(-1)
        return arr, target

m = 5
data = np.random.random((100, m))
model = BiasModel(m)
my_data = MyData(data)

loss_func = F.binary_cross_entropy_with_logits
with torch.no_grad():
    loss = 0.
    for arr, target in my_data: loss += loss_func(model(arr), target)
    print('loss before', loss / len(my_data))

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

loss_tot = 0.
for arr, target in my_data:
    loss = loss_func(model(arr), target)
    loss_tot += loss


loss before tensor(0.5735)
RuntimeError                              Traceback (most recent call last)
<ipython-input-4-27bce65b553b> in <module>()
     56     loss_tot += loss
     57     loss.backward()
---> 58     optimizer.step()

~/miniconda3/envs/myproject/lib/python3.6/site-packages/torch/optim/ in step(self, closure)
    105                         d_p = buf
--> 107       ['lr'], d_p)
    109         return loss

RuntimeError: expand(torch.FloatTensor{[5]}, size=[]): the number of sizes provided (0) must be greater or equal to the number of dimensions in the tensor (1)

This line in your forward input - bias broadcasts, but in backward you didn’t revert the broadcasting.

I think you’d be better off just use the provided subtraction rather than writing a new autograd.Function… It will be faster and won’t be error-prune.

Thanks, Siman!

That makes a lot of sense now.

Resolved it by changing a line in the backward pass in AdBias to
if len_grad == 2: grad_bias = -1 * torch.mean(grad_out)

The example above was only meant to illustrate my issue with broadcasting doing the learning phase. Thanks for the suggestion though.