For a required Tensor, RuntimeError: One of the differentiated Tensors does not require grad

I’m getting the following error message "For a required Tensor, RuntimeError: One of the differentiated Tensors does not require grad " I don’t think I should add requires_grad = False for this or the other tensors. I think there might be an error elsewhere that I can’t notice. Any suggestions? Or am I wrong?

The error arise from “u_t = torch.autograd.grad(u.sum(), t, create_graph=True)[0]” which is towards the end of the displayed code. Below is a snippet of my code

> class PhysicsInformedNN:
>         # Initialize the class
>         def __init__(self, X, u, X_f, X_val, u_val, layers, lb, ub):
>             
>             self.lb = lb
>             self.ub = ub
>             self.layers = layers
> 
>             # Initialize NNs
>             self.weights, self.biases = self.initialize_NN(layers)
> 
>             # Initialize parameters
>             self.lambda1 = torch.zeros((16, 1), dtype=torch.float32, requires_grad=True)
> 
>             # Specify the list of trainable variables 
>             var_list_1 = self.biases + self.weights
>             var_list_Pretrain = self.biases + self.weights + [self.lambda1]
>             #var_list_Pretrain.append(self.lambda1)
> 
>             ######### Training data ################
>             #either keep placeholders like the author or work directly with tensors
>             self.x = X[:, 0:1]
>             self.t = X[:, 1:2]
>             self.u = u
>             self.x_f = X_f[:, 0:1]
>             self.t_f = X_f[:, 1:2]
> 
>             self.x_pt = torch.empty((self.x.shape[0], self.x.shape[1]), dtype=torch.float32)
>             self.t_pt = torch.empty((self.t.shape[0], self.t.shape[1]), dtype=torch.float32)
>             self.u_pt = torch.empty((self.u.shape[0], self.u.shape[1]), dtype=torch.float32)
>             self.x_f_pt = torch.empty((self.x_f.shape[0], self.x_f.shape[1]), dtype=torch.float32)
>             self.t_f_pt = torch.empty((self.t_f.shape[0], self.t_f.shape[1]), dtype=torch.float32)
> 
>             self.u_pred = self.net_u(self.x_pt, self.t_pt)
>             self.f_pred, self.Phi_pred, self.u_t_pred = self.net_f(self.x_f_pt, self.t_f_pt, self.x_f.shape[0])
> 
>             self.loss_u = torch.mean((self.u_pt - self.u_pred)**2)
>             self.loss_f_coeff_pt = torch.empty(1, dtype=torch.float32)
>             self.loss_f = self.loss_f_coeff_pt * torch.mean(self.f_pred**2)
> 
>             self.loss_lambda = 1e-7 * torch.norm(self.lambda1, p=1)
>             self.loss = torch.log(self.loss_u + self.loss_f + self.loss_lambda)  # log loss
> 
>             ######### Validation data ################
>             self.x_val = X_val[:, 0:1]
>             self.t_val = X_val[:, 1:2]
>             self.u_val = u_val
> 
>             # Create tensors for placeholder-like behavior (not a common practice in PyTorch)
>             self.x_val_pt = torch.empty((self.x_val.shape[0], self.x_val.shape[1]), dtype=torch.float32)
>             self.t_val_pt = torch.empty((self.t_val.shape[0], self.t_val.shape[1]), dtype=torch.float32)
>             self.u_val_pt = torch.empty((self.u_val.shape[0], self.u_val.shape[1]), dtype=torch.float32)
> 
> 
>             self.u_val_pred = self.net_u(self.x_val_pt, self.t_val_pt)
>             self.f_val_pred, _, _ = self.net_f(self.x_val_pt, self.t_val_pt, self.x_val.shape[0])
> 
>             self.loss_u_val = torch.mean((self.u_val_pt - self.u_val_pred)**2)
>             self.loss_f_val = torch.mean(self.f_val_pred**2)
>             self.loss_val = torch.log(self.loss_u_val + self.loss_f_val)  # log loss
> 
>             ######### Optimizer #########################
>             self.optimizer = optim.LBFGS(self.loss,var_list=var_list_1, max_iter=1000,max_eval=1000,history_size=50,line_search_fn='strong_wolfe',ftol=1.0*torch.finfo(torch.float32).eps)
>             self.optimizer = optim.LBFGS(self.loss,var_list=var_list_Pretrain, max_iter=1000,max_eval=1000,history_size=50,line_search_fn='strong_wolfe',ftol=1.0*torch.finfo(torch.float32).eps)
> 
>            
>             self.global_step = torch.tensor(0, dtype=torch.long, requires_grad=False)
>             starter_learning_rate = 1e-3
>             self.learning_rate = starter_learning_rate * (0.75 ** (self.global_step // 1000))
>             
>             self.optimizer_Adam = optim.Adam(var_list = var_list_1, lr=starter_learning_rate, betas=(0.99, 0.9), eps=1e-8)
> 
>             def closure():
>                 self.optimizer_Adam.zero_grad()
>                 self.loss.backward()
>                 return self.loss
> 
>             self.train_op_Adam = closure
>             self.global_step += 1 #needed?
>             self.learning_rate.step() #needed?
>             
>             self.torch_dict = {self.x_pt: self.x, self.t_pt: self.t, self.u_pt: self.u, 
>                        self.x_f_pt: self.x_f, self.t_f_pt: self.t_f,
>                        self.x_val_pt: self.x_val, self.t_val_pt: self.t_val, self.u_val_pt: self.u_val}
> 
>         def initialize_NN(self, layers):
>             weights = []
>             biases = []
>             num_layers = len(layers)
>             for l in range(0, num_layers - 1):
>                 W = self.xavier_init(size=[layers[l], layers[l+1]])
>                 b = torch.zeros([1, layers[l+1]], dtype=torch.float64, requires_grad=True)
>                 weights.append(W)
>                 biases.append(b)
>             return weights, biases
> 
>         def xavier_init(self, size):
>             in_dim = size[0]
>             out_dim = size[1]
>             xavier_stddev = np.sqrt(2 / (in_dim + out_dim))
>             return torch.randn([in_dim, out_dim], dtype=torch.float64) * xavier_stddev
> 
>         def neural_net(self, X, weights, biases):
>             num_layers = len(weights) + 1
> 
>             H = 2.0 * (X - self.lb) / (self.ub - self.lb) - 1.0
>             for l in range(0, num_layers - 2):
>                 W = weights[l]
>                 b = biases[l]
>                 H = torch.tanh(torch.add(torch.matmul(H, W), b))
>             W = weights[-1]
>             b = biases[-1]
>             Y = torch.add(torch.matmul(H, W), b)
>             return Y
> 
>         def net_u(self, x, t):
>             u = self.neural_net(torch.cat([x, t], 1), self.weights, self.biases)
>             return u
> 
>         def net_f(self, x, t, N_f):
>             u = self.net_u(x, t)
>             u_t = torch.autograd.grad(u.sum(), t, create_graph=True)[0] #error starts here
>             u_x = torch.autograd.grad(u.sum(), x, create_graph=True)[0]
>             u_xx = torch.autograd.grad(u_x.sum(), x, create_graph=True)[0]
>             u_xxx = torch.autograd.grad(u_xx.sum(), x, create_graph=True)[0]
> 
>             Phi = torch.cat([torch.ones(N_f, 1, dtype=torch.float32),
>                             u, u**2, u**3, u_x, u*u_x, u**2*u_x,
>                             u**3*u_x, u_xx, u*u_xx, u**2*u_xx,
>                             u**3*u_xx, u_xxx, u*u_xxx, u**2*u_xxx,
>                             u**3*u_xxx], 1)
> 
>             self.library_description = ['1',
>                                         'u', 'u**2', 'u**3',
>                                         'u_x', 'u*u_x', 'u**2*u_x', 'u**3*u_x',
>                                         'u_xx', 'u*u_xx', 'u**2*u_xx', 'u**3*u_xx',
>                                         'u_xxx', 'u*u_xxx', 'u**2*u_xxx', 'u**3*u_xxx']
> 
>             f = torch.matmul(Phi, self.lambda1) - u_t
>             return f, Phi, u_t

Thank you in advance

u_t expects t to be the inputs, however self.net_f is called with:

self.f_pred, self.Phi_pred, self.u_t_pred = self.net_f(self.x_f_pt, self.t_f_pt, self.x_f.shape[0])

where self.t_f_pt was initialized as:

self.t_f_pt = torch.empty((self.t_f.shape[0], self.t_f.shape[1]), dtype=torch.float32)

two lines before that call and this wasn’t used to create self.x_f_pt (which is also just an empty tensor). Let me know if I’m missing something.

I’m new to PyTorch and am converting a TensorFlow code to PyTorch.

The t inputs for u_t is coming from self.t = X[:, 1:2]. Are you saying self.net_f needs to be called with other variables?
I’m sorry, if can’t seem to pinpoint the error from what your comment.

I might have misunderstood your model, so could you post a properly formatted minimal and executable code snippet reproducing the error, please?