Runtime Error in gradient of a network

hannesvdc · March 2, 2024, 3:54am

Hello!

I’m a first time Pytorch user and I’ve been trying to implement a simple physics-informed NN to learn the solution to Newton’s law of heat transfer.

dT / dt = - (T(t) - T_environment) / tau

Sorry for the typesetting, I couldn’t get LaTeX to work here. The cost function of my neural network is basically the above equation, but squared. So, as part of evaluating the cost, I need take a gradient of T(t) (the scalarr output of the NN) to t (the scalar input of the NN). Below is part of my implementation for that:

import torch as pt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.autograd import grad

pt.set_grad_enabled(True)


class CustomDataset(Dataset):
    def __init__(self, n_data_points, n_zeros):
        super().__init__()
        
        T_end = 3.0
        zero_points = pt.zeros(n_zeros, dtype=pt.float64)
        time_points = pt.linspace(0.0, T_end, n_data_points, dtype=pt.float64)
        self.data = pt.cat((zero_points, time_points))

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
    
# Learning Parameters
n_epochs = 10
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10
n_zeros = 1000
n_non_zeros = 1001

# Model Parameters
T_0 = 273.0
T_environment = 373.0

train_loader = DataLoader(CustomDataset(n_non_zeros, n_zeros), batch_size=batch_size_test, shuffle=True)
test_loader = DataLoader(CustomDataset(n_non_zeros, n_zeros), batch_size=batch_size_test, shuffle=True)

# Create the Network Structure. Here, the Input and Output are both
# single Values. The Network has 5 fully connected Layers with 5
# Nodes on each Layer.
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(1, 5)
        self.fc2 = nn.Linear(5, 5)
        self.fc3 = nn.Linear(5, 5)
        self.fc4 = nn.Linear(5, 5)
        self.fc5 = nn.Linear(5, 1)

        self.double()

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x 
    
class PINN_Loss(nn.Module):
    def __init__(self, neural_net, T_0 , T_environment):
        super(PINN_Loss, self).__init__()
        self.neural_net = neural_net
        self.T0 = T_0
        self.T_env = T_environment

    def forward(self, inputs, targets=None):
        error = 0.0
        n_terms = 0
        for n in range(len(inputs)):
            x = pt.unsqueeze(inputs[n], 0)
            print('x', x)
            funcApprox = self.neural_net.forward(x)
            print('funcApprox', funcApprox)
            grad_func = grad(funcApprox, x, grad_outputs=pt.ones_like(funcApprox), retain_graph=True, create_graph=False)
            error += (grad_func + (funcApprox - self.T_env))**2
            n_terms += 1

            if x == 0.0: # Check for boundary terms, remove this later!
                error += (funcApprox - self.T0)**2
                n_terms += 1

        return error/n_terms
        

# Initialize the Network and the Optimizer (SGD)
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
loss_fn = PINN_Loss(network, T_0, T_environment)

#Testing Subroutine
test_losses = []
def test():
	network.eval()
	test_loss = 0
	correct = 0
	with pt.no_grad():
		for data in test_loader:
			test_loss += loss_fn(data)
	test_loss /= len(test_loader.dataset)
	test_losses.append(test_loss)
	print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
		test_loss, correct, len(test_loader.dataset),
		100. * correct / len(test_loader.dataset)))
     
# Actually Train the Network
optimizer.zero_grad()
test()

This is my minimal failing example. When I run this code, I get the following RuntimeError:

Traceback (most recent call last):
  File "minimal_example.py", line 116, in <module>
    test()
  File "minimal_example.py", line 107, in test
    test_loss += loss_fn(data)
  File "/usr/local/anaconda3/envs/py36/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/anaconda3/envs/py36/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
  File "minimal_example.py", line 83, in forward
    grad_func = grad(funcApprox, x, grad_outputs=pt.ones_like(funcApprox), retain_graph=True, create_graph=False)
  File "/usr/local/anaconda3/envs/py36/lib/python3.8/site-packages/torch/autograd/__init__.py", line 411, in grad
    result = Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

I don’t understand why taking the gradient of the NN with respect to the input fails. Would anyone know how I could fix this? I’m happy to provide additional information if required.

I would be extremely grateful if anyone could help me.

Hannes

ptrblck · March 2, 2024, 6:06pm

You are explicitly disabling the gradient computation via: with pt.no_grad(): which then fails in the grad call inside loss_fn since no computation graph was created.