I am trying to train an ensemble of neural networks in PyTorch. During the backward pass, I am encountering the following error message:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [240, 1]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead.
Here is the code that is causing the error:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
import numpy as np
Check if a GPU is available
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
print(“Using device:”, device)
Load the dataset (Possible values: ‘iris’, ‘digits’, ‘wine’, ‘breast_cancer’, ‘diabetes’, ‘boston’)
dataset_name = ‘breast_cancer’
X, y = load_dataset(dataset_name)
Shuffle and split the dataset into training and testing sets
X, y = shuffle(X, y, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
Standardize the data (zero mean, unit variance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
Convert NumPy arrays to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.long).to(device)
Define a simple neural network
class SimpleNN(nn.Module):
def init(self, input_size, hidden_size, output_size):
super(SimpleNN, self).init()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(hidden_size, hidden_size)
self.relu3 = nn.ReLU()
self.fc4 = nn.Linear(hidden_size, hidden_size)
self.relu4 = nn.ReLU()
self.fc5 = nn.Linear(hidden_size, hidden_size)
self.relu5 = nn.ReLU()
self.fc6 = nn.Linear(hidden_size, hidden_size)
self.relu6 = nn.ReLU()
self.fc7 = nn.Linear(hidden_size, hidden_size)
self.relu7 = nn.ReLU()
self.fc8 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
x = self.relu2(x)
x = self.fc3(x)
x = self.relu3(x)
x = self.fc4(x)
x = self.relu4(x)
x = self.fc5(x)
x = self.relu5(x)
x = self.fc6(x)
x = self.relu6(x)
x = self.fc7(x)
x = self.relu7(x)
x = self.fc8(x)
return x
Create a list of neural network instances
input_size = X.shape[1]
hidden_size = input_size*8
output_size = 1
ensemble_size = 20
ensemble = [SimpleNN(input_size, hidden_size, output_size).to(device) for i in range(ensemble_size)]
Enable anomaly detection
torch.autograd.detect_anomaly()
Specify number of epochs
epochs = 7500
def entropy(p):
eps = 1e-9
return -p * torch.log2(p + eps) - (1 - p) * torch.log2(1 - p + eps)
def step_sigmoid(x):
return torch.sigmoid(1000*(x -.5))
def voted_predict(y_pred):
return step_sigmoid(torch.mean(step_sigmoid(y_pred), axis = 1))
Compute the indicator function I
def indicator(y_pred, y_true):
return (voted_predict(y_pred)- y_true)**2
Calculate the probability
def positive_probability(y_pred):
num_pos = torch.sum(step_sigmoid(y_pred), axis = 1)
total = y_pred.shape[1]
p_pos = num_pos / total
return p_pos
def concatenate_predictions(ens_y_pred, y_pred):
if ens_y_pred is None:
ens_y_pred = y_pred.clone()
else:
# Create a new tensor to hold the concatenated values
new_ens_y_pred = torch.cat([ens_y_pred, y_pred], dim=1)
# Return the new tensor
ens_y_pred = new_ens_y_pred
return ens_y_pred
Define the custom loss function
def custom_loss(y_true, y_pred):
# Make a copy of y_pred before passing it to the indicator function
y_pred_copy = y_pred#.clone().detach()
y_true_copy = y_true#.clone().detach()
I = indicator(y_pred_copy, y_true_copy).detach().clone()
print("I in custom_loss:", I)
# Calculate the probability
p_pos = positive_probability(y_pred_copy)
print("p_pos in custom_loss:", p_pos)
# Compute the entropy of the predicted probabilities
H = entropy(p_pos)#.detach().clone()
print("H in custom_loss:", H)
# Compute the loss function
loss = I + torch.pow(H, 1 - I) * torch.pow(1 - H, I)
print("loss in custom_loss:", loss)
print("torch.mean(loss):", torch.mean(loss))
return torch.mean(loss)
optimizer = optim.SGD([{‘params’: model.parameters()} for model in ensemble], lr=0.01)
# Initialize TensorBoard writer
writer = SummaryWriter()
# Train the ensemble of neural networks
prev_weights = {}
for epoch in range(epochs):
if epoch%100 == 0:
print(“epoch:”, epoch)
# Shuffle the training data for each epoch
X_train_shuffled, y_train_shuffled = shuffle(X_train.clone(), y_train.clone(), random_state=epoch)
# Initialize the ensemble predictions
ens_y_pred = None
Train each neural network in the ensemble
for i, model in enumerate(ensemble):
# Set model to training mode
model.train()
# Forward pass
y_pred = model(X_train)
print("y_pred.shape:", y_pred.shape)
# Concatenate the tensors along the first dimension
ens_y_pred = concatenate_predictions(ens_y_pred, y_pred)
# Reshape tensor
ens_y_pred = ens_y_pred.reshape([ens_y_pred.shape[1],ens_y_pred.shape[0]])
print(“ens_y_pred.shape:”,ens_y_pred.shape )
for i, model in enumerate(ensemble):
print(‘i:’, i)
# Calculate loss
loss = custom_loss(y_train, ens_y_pred)
# Backward pass and optimization
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()