# Variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [1]]

Hi, I have experience with PyTorch, but it’s the first time I’m generating values of stochastic variables by recursion, and including initial values in the optimizer. The procedure roughly follows the paper [1709.05963] Machine learning approximation algorithms for high-dimensional fully nonlinear partial differential equations and second-order backward stochastic differential equations, with some differences.

``````import torch
import numpy as np
import scipy.integrate as integrate
import matplotlib.pyplot as plt

name = 'Merton' # 'Merton', 'Allen-Cahn', 'Black-Scholes', 'HJB'

# Define variables
T = 1
timeDim = 2
spaceDim = 1
d = max(5, spaceDim)
if name == 'Merton':
mu = 0.3
r = 0.05
sigma = 0.4
eta = 1.0
coeff = (mu-r)/sigma # a scalar term that arises frequently in the code

n = 20
h = T/n
sqrt_h = float(np.sqrt(h))
n_maxstep = 10000
batchSize = 4
lrParam = 0.002
torch.set_default_dtype(torch.float64)
torch.manual_seed(1234)

class NeuralNetwork(torch.nn.Module):
def __init__(self, structure):
super().__init__()
self.network = []
for inFeature, outFeature in zip(structure, structure[1:]):
self.network.extend([
torch.nn.Linear(inFeature, outFeature),
torch.nn.ReLU(),
])
self.network.pop()
self.network = torch.nn.Sequential(*self.network)

def forward(self, x):
return self.network(x)

# neural network architectures
n_neuronForM = [timeDim + spaceDim, d, d, spaceDim**2] # U_yy
n_neuronForN = [timeDim + spaceDim, d, d, spaceDim] # U_sy + 1/2 U_yyy

n_neuronForU = [timeDim + spaceDim, d, d, 1]

modelM = NeuralNetwork(n_neuronForM)
modelN = NeuralNetwork(n_neuronForN)
modelU = NeuralNetwork(n_neuronForU)

if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"

# Initial/Terminal Condition
def g(t, y): # y has [batchSize, n+1], but t is just i*h
if name == 'Merton':
return -torch.div(torch.exp(-eta*y),(1+r*(T-t)))

def w(t,s): # This is ok to be scalar function
if name == 'Merton':
return 1/(1+r*(s-t))

# Form of PDE
#     t, s, y, Uts, Uss, U_yts, U_yts, U_yyts, U_yyss
def F(t, s, X, Y1, Y2, Z1, Z2, M1, M2, name): # This needs to be a torch tensor function, except in t and s
'''batch = X.size(dim=0)
trajLength = X.size(dim=1)
i = int(t/h)
j = int(s/h)'''
if name == 'Merton':
y = 0.5*coeff**2*torch.mul(torch.square(torch.div(Z2,M2)),M1)-coeff**2*torch.mul(torch.div(Z2,M2),Z1)-w(t,s)*Y1-0.5*M1
return y

# Comparison function, if we want one
def u(t,s,y,name):
if name == 'Merton':
f = lambda t, tau: 0.5*coeff/sigma+w(t,tau)

#initial value for X0, initial guess for Y(0,0,X0) and Z(0,0,X0)
X0 = torch.ones(1)
Y0 = -torch.rand(1)
Z0 = torch.rand(1)

# All neural network input dimensions are [batchSize, timeDim + spaceDim]. The array of [t, s, y] goes in each row.
# The time and space substitution will be done by t=i*h, s=j*h, y=X(s)
# All neural network output dimensions are [batchSize, outputdim], output dim is either 1, spaceDim or spaceDim^2
def generateBSVIE(Y0, Z0):
# Computing Yii and Zii
dW = torch.normal(mean = 0.0, std = sqrt_h, size = [batchSize, n]) # [B, n]
X = torch.ones((batchSize, n+1))*X0 #[B, n+1], representing X_0, X_1, ..., X_n
for i in range(n):
X[:,i+1] = X[:,i] + dW[:,i]
Yii = torch.ones((batchSize, n+1, 1))*Y0
Zii = torch.ones((batchSize, n+1, spaceDim))*Z0
terminalValue = torch.zeros((batchSize, n+1))
for b in range(batchSize):
for i in range(n-1):
tt = torch.cat((torch.ones((batchSize, 2))*i*h, X[:, i].reshape([batchSize,1])), 1)
if spaceDim == 1:
Mii = modelM(tt).reshape([batchSize, 1]) # T, spaceDim^2 as output
Nii = modelN(tt).reshape([batchSize, 1]) # A, spaceDim as output
else:
Mii = modelM(tt) # T, spaceDim^2 as output
Nii = modelN(tt) # A, spaceDim as output
Yii[b,i+1] = Yii[b,i] + F(i*h, i*h, X[b,i], Yii[b,i,0], Yii[b,i,0], Zii[b,i,:], Zii[b,i,:], Mii[b,0], Mii[b,0], name)*h + dW[b,i]*Zii[b,i,:]
Zii[b,i+1] = Zii[b,i] + Nii[b,:]*h + Mii[b,:]*dW[b,i]
terminalValue[b,i] = g(i*h, X[b,-1])

# Computing Yij and Zij
for b in range(batchSize):
YArray = torch.zeros((batchSize,n+1,n+1,1))
ZArray = torch.zeros((batchSize,n+1,n+1,spaceDim))
UArray = torch.zeros((batchSize,n+1,n+1,1))
for i in range(n):
YArray[b,i,i,0], ZArray[b,i,i,:] = Yii[b,i,0], Zii[b,i,:]
for j in range(i,n):
ts = torch.cat((torch.ones((batchSize,1))*i*h, torch.ones((batchSize,1))*j*h, X[:,j].reshape([batchSize,1])), 1)
ss = torch.cat((torch.ones((batchSize,2))*j*h, X[:,j].reshape([batchSize,1])), 1)
if spaceDim == 1:
Mij = modelM(ts).reshape([batchSize, 1])
Mjj = modelM(ss).reshape([batchSize, 1])
Nij = modelN(ts).reshape([batchSize, 1])
Njj = modelN(ss).reshape([batchSize, 1])
UArray[b,i,j,0] = modelU(ts)[b,0]
else:
Mij = modelM(ts)
Mjj = modelM(ss)
Nij = modelN(ts)
Njj = modelN(ss)
UArray[b,i,j,0] = modelU(ts)[b,0]
YArray[b,i,j+1,0] = YArray[b,i,j,0] - F(i*h, j*h, X[b,j], YArray[b,i,j,0], YArray[b,j,j,0], ZArray[b,i,j,:], ZArray[b,j,j,:], Mij[b,:], Mjj[b,:], name)*h+ZArray[b,i,j,:]*dW[b,j] # [B, 1]
ZArray[b,i,j+1,:] = ZArray[b,i,j,:] + Nij[b,:]*h + Mij[b,:]*dW[b,j] # [B, spaceDim]

return YArray, ZArray, UArray, terminalValue

YZLearningHistory = []
optimizer = torch.optim.Adam([Y0, Z0] + list(modelM.parameters()) + list(modelN.parameters()), lr = lrParam)

def computeLoss(Y0, Z0, mode):
# loss computation
if mode == 'XY':
Y, Z, U, terminalValue = generateBSVIE(Y0, Z0)
loss = torch.mean(torch.square(Y[:,:,n,0].reshape([batchSize, n+1])-terminalValue))
elif mode == 'U':
modelU.eval()
Y, Z, U, terminalValue = generateBSVIE(Y0, Z0)
loss = torch.mean(torch.square(U-Y))
return loss

for step in range(n_maxstep):
currentLR = optimizer.param_groups[-1]["lr"]
lossY = computeLoss(Y0,Z0, mode='XY')
YZLearningHistory.append([step, lossY.item(), Y0.item(), currentLR])
#print some things
print(lossY, float(Y0))
lossY.backward(retain_graph=True)
optimizer.step()
``````

I tried with and without “retain_graph=True” in lossY.backward(), the error message from that line comes out to be:

“RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [1]], which is output 0 of AsStridedBackward0, is at version 230; expected version 229 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!”

I suspect it could be the code for F, or the recursion process, that causes either Y0, Z0, or the parameters in modelM or modelN, but I am not certain. What might be the cause of the error?

you are allowed to do this:

``````import torch
x = torch.randn(10,10)
y = torch.randn(10,10)
l = torch.sum((x-y)**2)
print(l.item())
l.backward()
``````

this as well :

``````import torch
re = torch.randn(5,5)

x = torch.randn(10,10)
y = torch.randn(10,10)
s = (x-y)
s[:5,:5] = 0
l = torch.sum(s**2)
print(l.item())

l.backward()
``````

but not this:

``````import torch
re = torch.randn(5,5)

x = torch.randn(10,10)
y = torch.randn(10,10)
s = (x-y)
l1 = torch.sum(s**2)
s[:5,:5] = 0
l2 = torch.sum(s**2)
l = l1 +l2
print(l.item())
l.backward()
``````
``````RuntimeError: one of the variables needed for gradient computation has been modified by an
inplace operation: [torch.FloatTensor [10, 10]], which is output 0 of torch::autograd::CopySlices,
is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation
In the last snippet, we compute the loss in two step. However, modifying `s` in-place can cause issues during backpropagation.