Hi! I have a very simple example `Module`

below, where I have a circular buffer, and attempt to learn a `_delay`

parameter (i.e. distance between a read and write pointer) that transforms an input sin wave into a phase-shifted and zero-padded output. The goal is to emulate a delay line (as in DSP), learning an unknown delay parameter given some arbitrary input and reference output.

Currently this gives errors that the `element 0 of tensors does not require grad and does not have a grad_fn`

- maybe, the problem (as represented) isnâ€™t differentiable?

Any advice as to why this doesnâ€™t work would be appreciate, and practical suggestions for representing this problem in PyTorch! Thanks

Error:

```
Traceback (most recent call last):
File "delay_eg.py", line 59, in <module>
loss.backward()
File "venv391/lib/python3.9/site-packages/torch/_tensor.py", line 307, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "venv391/lib/python3.9/site-packages/torch/autograd/__init__.py", line 154, in backward
Variable._execution_engine.run_backward(
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
```

Re-producible code (note that the read pointer is fractional!):

```
import numpy as np
import torch
class Delay(torch.nn.Module):
def __init__(self, N=44100, offset=1):
super().__init__()
self.N = N
self.register_buffer('circ_buffer', torch.zeros(N, 1, dtype=torch.float))
self._delay = torch.nn.Parameter(torch.FloatTensor([offset]))
self.reset_state()
def reset_state(self):
self.circ_buffer[self.circ_buffer > 0.0] = 0.0
self.circ_buffer[self.circ_buffer < 0.0] = 0.0
self._read_ptr = 0.0
self._write_ptr = int(self._delay.item())
def forward(self, x):
y = torch.zeros(*x.shape, dtype=torch.float).to(x.device)
for i in range(x.shape[0]):
x_i = x[i].item()
# Write
self.circ_buffer[self._write_ptr] = x_i
# Read (with linear interpolation)
i0 = np.floor(self._read_ptr).astype(int)
i1 = (i0 + 1) % self.N
frac = self._read_ptr - np.floor(self._read_ptr)
y[i] = self.circ_buffer[i0] + frac*(self.circ_buffer[i1]-self.circ_buffer[i0])
# Pointers
self._write_ptr += 1
if self._write_ptr == self.N:
self._write_ptr = 0
self._read_ptr += 1
if self._read_ptr >= self.N:
self._read_ptr -= self.N
return y
if __name__ == "__main__":
device = torch.device("cpu")
# Simple sin wave
ts = np.arange(0.0, 0.3, 1/44100)
xs = np.sin(2*np.pi*5*ts + 0)
# Phase shift, with zero padding
delay = int(xs.shape[0]/1.31)
ys = np.concatenate([np.zeros(xs.shape[0]-delay), xs[:delay]])
model = Delay().to(device)
learning_rate = 1e-3
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
device = next(model.parameters()).device
loss_fn = torch.nn.MSELoss()
for _ in range(100):
xst = torch.tensor(xs).float().to(device)
yst = torch.tensor(ys).float().to(device)
model.reset_state()
optimizer.zero_grad()
loss = loss_fn(model(xst), yst)
loss.backward()
optimizer.step()
print(loss.item())
```