I am building an LSTM model for time series prediction. We start with a pandas DataFrame and load that into a TensorDataset and train the model.
I have received this error a few times and have managed to fix it in the past, however this time I just cannot find the in-place operation in question so I would really appreciate a second eye.
Before forming the Tensor dataset I did have to perform some operations to construct the dataset in pandas but I don’t think that should affect autograd, right?
Code snippets:
leads_df = leads_df.resample('6H').sum() # just a resample of df
leads_df_var = leads_df.iloc[:, :10]
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data.dataset import random_split
def generate_sequences(df, tw):
data = list()
L = len(df)
for i in range(L-tw):
sequence = df[i:i+tw].values
target = df[i+tw:i+tw+1].values
data.append((sequence, target))
return data
def list_of_tensors_to_dataset(data):
tensor_x = torch.Tensor([x[0] for x in data])
tensor_y = torch.Tensor([x[1] for x in data])
return TensorDataset(tensor_x, tensor_y)
seq_len = 4
data = generate_sequences(leads_df_var, seq_len)
dataset = list_of_tensors_to_dataset(data)
num_features = dataset[0][0].shape[1]
output_size = dataset[0][0].shape[1]
hidden_size = 128
batch_size = 16
train_len = int(len(dataset)*split)
lens = [train_len, len(dataset)-train_len]
dataloader = DataLoader(dataset, batch_size=batch_size)
Model:
class LeadsPredictor(nn.Module):
def __init__(self, num_features, seq_len, hidden_size, output_size):
super().__init__()
self.num_features = num_features
self.hidden_size = hidden_size
self.seq_len = seq_len
self.output_size = output_size
self.n_layers = 1
self.lstm = nn.LSTM(self.num_features,
self.hidden_size,
batch_first=True)
self.fc = nn.Linear(self.hidden_size*self.seq_len, self.output_size)
hidden_state = torch.zeros(self.n_layers, batch_size, self.hidden_size)
cell_state = torch.zeros(self.n_layers, batch_size, self.hidden_size)
self.hidden = (hidden_state, cell_state)
def forward(self, x):
x, self.hidden = self.lstm(x, self.hidden)
x = x.contiguous().view(batch_size,-1)
x = self.fc(x)
return x
Train function:
import time
import matplotlib.pyplot as plt
def train(model, dataloader, num_epochs):
with torch.autograd.set_detect_anomaly(True):
model.train()
losses = list()
ts = time.time()
for epoch in tqdm(range(num_epochs)):
epoch_losses = list()
for idx, (seq, label) in enumerate(dataloader):
optimizer.zero_grad()
out = model(seq)
loss = criterion(out, label.squeeze())
loss.backward(retain_graph=True)
optimizer.step()
te = time.time()
fig, ax = plt.subplots()
ax.plot(range(num_epochs), losses)
plt.show()
mins = int((te-ts) / 60)
secs = int((te-ts) % 60)
print('Training completed in {} minutes, {} seconds.'.format(mins, secs))
return losses, model
Error Message:
one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [128, 512]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
I would really reaally appreciate some help with this