I encounter the following problem using the tensor.index_put_() method. My code has roughly the following structure
auto sample_points = get_sample_points(); // torch::Tensor
auto samples = get_samples(); // torch::Tensor
// Create memory
auto rhs = torch::empty_like(sample_points.view({1,-1}));
auto out = torch::empty_like(sample_points.view({1,-1}));
// Generate labels
for (int64_t i=0; i<rhs.size(1); ++i)
rhs.index_put_({0, i}, compute_label( sample_points[0].index({i}).flatten() ) );
for (int64_t epoch = 0; epoch != options_.max_epoch(); ++epoch)
{
// Reset gradients
net_->zero_grad();
// Execute the model on the input data
auto pred = net_->forward(samples);
// Generate predictions
for (int64_t i=0; i<out.size(1); ++i)
out.index_put_({0, i}, pred.index({0, i})); // in the real implementation it is compute_pred( pred.index({0, i}) )
// Compute the loss value
auto loss = torch::mse_loss( out , rhs );
// Compute gradients of the loss w.r.t. the model parameters
loss.backward({}, true, true);
// Update the parameters based on the calculated gradients
opt_.step();
}
The first iteration works well but the second one gives the following error:
libc++abi: terminating with uncaught exception of type c10::Error: one of the variables needed for gradient computation has been modified by an inplace operation: [CPUFloatType [100, 5]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Exception raised from unpack at /tmp/libtorch-20220703-3578-yic0sn/torch/csrc/autograd/saved_variable.cpp:164 (most recent call first):
It is the index_put_ that breaks the gradient calculation. The problem is that the compute_label and compute_pred function assume Tensor objects with a single entry (i.e. they are implemented as scalar functions).
Could you post a minimal, executable code snippet defining out as well as pred so that we could see what the expected output would be?
You are right that index_put_ is most likely causing the issue as it’s an inplace operation and seems to manipulate a tensor inplace which is needed for the gradient calculation.
#include <torch/torch.h>
// Define a new Module.
struct Net : torch::nn::Module {
Net() {
// Construct and register two Linear submodules.
fc1 = register_module("fc1", torch::nn::Linear(11, 64));
fc2 = register_module("fc2", torch::nn::Linear(64, 32));
fc3 = register_module("fc3", torch::nn::Linear(32, 11));
}
// Implement the Net's algorithm.
torch::Tensor forward(torch::Tensor x) {
// Use one of many tensor manipulation functions.
x = torch::relu(fc1->forward(x));
x = torch::relu(fc2->forward(x));
x = torch::relu(fc3->forward(x));
return x;
}
// Use one of many "standard library" modules.
torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr};
};
int main()
{
torch::autograd::AnomalyMode::set_enabled(true);
// Create samples
auto samples = torch::linspace(0, 10, 11);
// Create memory
auto rhs = torch::empty_like(samples.view({1,-1}));
auto out = torch::empty_like(samples.view({1,-1}));
// Generate labels
for (int64_t i=0; i<rhs.size(1); ++i)
rhs.index_put_({0, i}, samples.index({i}));
// REMARK: Replacing the above by the line below fixes the error
// rhs = samples;
// Create a new Net
auto net = std::make_shared<Net>();
// Create a new Adam optimizer
torch::optim::Adam optimizer(net->parameters());
for (int64_t epoch = 0; epoch != 2; ++epoch)
{
std::cout << "Epoch " << std::to_string(epoch) << std::endl;
// Reset gradients
net->zero_grad();
// Execute the model on the input data
auto pred = net->forward(samples);
// Generate predictions
for (int64_t i=0; i<out.size(1); ++i)
out.index_put_({0, i}, pred.index({i}));
// REMARK: Replacing the above by the line below fixes the error
// out = pred;
// Compute the loss value
auto loss = torch::mse_loss( out , rhs );
// Compute gradients of the loss w.r.t. the model parameters
loss.backward({}, true, true);
// Update the parameters based on the calculated gradients
optimizer.step();
}
}
As you can see it does not even depend on the way pred and out are computed. Already the use of index_put_ leads to the error.