LSTM where input is a function of output at previous time

I am trying to use deep learning to solve a stochastic control problem. The goal is to find a control process that maximizes the state value at the terminal time. Since we are working with time-series I thought it’s best to use LSTM. However, the thing is that the input at each time step depends on the control which is the output of our net. Hence I work with LSTMCell and create a for loop in the forward function that iterates over the timesteps. Here is my code:

T = 1
sequence_len = 100
dt = T/sequence_len
sqrdt = np.sqrt(dt)
drift = 0.08
volatility = 0.3

class ControlLSTM(nn.ModuleList):
    def __init__(self, sequence_len, dimension, hidden_dim, batch_size):
        super(ControlLSTM, self).__init__()

        # init the meta parameters
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.sequence_len = sequence_len
        self.dimension = dimension

        # first layer lstm cell
        self.lstm_1 = nn.LSTMCell(input_size=dimension, hidden_size=hidden_dim)
        # fully connected layer to connect the output of the LSTM cell to the output
        self.fc = nn.Linear(in_features=hidden_dim, out_features=dimension)

    def forward(self, x, w, hc):
        # empty tensor for the output of the lstm, this is the contol
        output_seq = torch.empty((self.sequence_len,
        # init the both layer cells with the zero hidden and zero cell states
        hc_1 = hc
        # for every timestep use input x[t] to compute control out from hiden state h1 and derive the next imput x[t+1]
        for t in range(self.sequence_len):
            # get the hidden and cell states from the first layer cell
            hc_1 = self.lstm_1(x[t], hc_1)
            # unpack the hidden and the cell states from the first layer
            h_1, c_1 = hc_1
            out = self.fc(h_1)
            output_seq[t] = out
            if t < self.sequence_len - 1:
                x[t+1] += x[t] * out * drift * dt + x[t]* out * volatility *sqrdt * w[t]
        # return the output and state sequence
        return output_seq, x

    #functions that initialize hiden state, input and noise
    def init_hidden(self):
        # initialize the hidden state and the cell state to zeros
        return (torch.zeros(self.batch_size, self.hidden_dim),
                torch.zeros(self.batch_size, self.hidden_dim))

    def init_brownian(self):
        return torch.randn(self.sequence_len, self.batch_size, self.dimension)

    def init_state(self):
        return torch.ones(self.sequence_len, self.batch_size, self.dimension)

#Custom loss function motivated by the log return at terminal time
# loss of the form -E[ln(|X_T|^2)]
def loss1(input):
    l2 = torch.norm(input, dim=1)
    log = torch.log(l2)
    loss = - torch.mean(log)
    return loss

net = ControlLSTM(sequence_len=sequence_len, dimension=1, hidden_dim=512, batch_size=30)
optimizer = optim.Adam(net.parameters(), lr=0.001)

#Training loop
losses = []
controls = []

for epoch in range(10):
    hc = net.init_hidden()
    x = net.init_state()
    w = net.init_brownian()

    control, state = net(x, w, hc)

    last = state[-1,:,:]
    loss = loss1(last)


The thing is when I run the training loop I get “RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation”. Why is that? Is my whole approach wrong or do I simply have a bug somewhere?

Hi, I am running into a similar problem. I was wondering if you were able to find a solution?