I was trying to train a very basic RNN on Shakespeare text however I keep getting the following error and don’t know what is going wrong:
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/autograd/__init__.py:197: UserWarning: Error detected in AddmmBackward0. Traceback of forward call that caused the error:
File "/Users/aayush/Desktop/deeplearning/shakernn.py", line 99, in <module>
train_network(model, loss_func, optimizer, lr, joined_lines, device)
File "/Users/aayush/Desktop/deeplearning/shakernn.py", line 83, in train_network
hidden_state, output = model(inp, hidden_state)
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/Users/aayush/Desktop/deeplearning/shakernn.py", line 42, in forward
hidden = self.i2h(combined)
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
(Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/python_anomaly_mode.cpp:119.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
Traceback (most recent call last):
File "/Users/aayush/Desktop/deeplearning/shakernn.py", line 99, in <module>
train_network(model, loss_func, optimizer, lr, joined_lines, device)
File "/Users/aayush/Desktop/deeplearning/shakernn.py", line 88, in train_network
loss.backward()
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/_tensor.py", line 488, in backward
torch.autograd.backward(
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/autograd/__init__.py", line 197, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [200, 100]], which is output 0 of AsStridedBackward0, is at version 3; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Here is the code:
import torch
import torch.nn as nn
import torch.nn.functional as F
from pprint import pprint
device = torch.device("cpu")
torch.autograd.set_detect_anomaly(True)
class EmbeddingSubscriptable:
def __init__(self, *args, **kwargs):
self.emb = nn.Embedding(*args, **kwargs)
def __getitem__(self, idx):
return self.emb.weight[idx].unsqueeze(0)
embedding_size = 100
hidden_size = 100
with open("data/shakespeare.txt") as f:
lines = f.readlines()
joined_lines = ''.join(lines)
vocab = sorted(set(joined_lines))
vocab_size = len(vocab)
itos = dict(enumerate(vocab))
stoi = {value:key for key, value in itos.items()}
vocab_embedding = EmbeddingSubscriptable(vocab_size, embedding_size)
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(input_size + hidden_size, output_size)
self.tanh = nn.Tanh()
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, inp, hidden_state):
combined = torch.cat((inp, hidden_state), dim=1)
hidden = self.i2h(combined)
output = self.i2o(combined)
hidden = self.tanh(hidden)
output = self.softmax(output)
return hidden, output
def initHidden(self):
hidden = torch.nn.init.kaiming_normal_(torch.zeros(hidden_size).reshape(1, hidden_size))
return hidden
@torch.no_grad()
def predict(model, length=50, start=" "):
global vocab_embedding
hidden_state = model.initHidden()
last_c = None
for c in start:
inp = vocab_embedding[stoi[c]]
hidden_state, output = model(inp, hidden_state)
last_c = torch.argmax(output)
for i in range(length):
inp = vocab_embedding[last_c]
hidden_state, output = model(inp, hidden_state)
last_c = torch.argmax(output)
print(itos[int(last_c)], end="")
def train_network(model, loss_func, optimizer, lr, data, device):
model
chunk = 1000
curr_count = 0
hidden_state = model.initHidden()
optimizer.zero_grad()
loss = 0
for i in range(len(data) - 1):
print(i)
inp = vocab_embedding[stoi[data[i]]]
label = torch.tensor(stoi[data[i + 1]]).unsqueeze(0)
hidden_state, output = model(inp, hidden_state)
loss = loss + loss_func(output, label)
curr_count += 1
if curr_count == chunk:
loss.backward()
optimizer.step()
optimizer.zero_grad()
curr_count = 0
loss = 0
loss_func = nn.NLLLoss()
lr = 0.01
model = RNN(embedding_size, hidden_size, vocab_size)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
train_network(model, loss_func, optimizer, lr, joined_lines, device)
I know it is usually caused my in-place operations but I really can’t see anything going wrong besides the return self.emb.weight[idx].unsqueeze(0)
but even that seemed to work fine with autograd when I did a small test with it in IPython. It says its happening in the hidden = self.i2h(combined)
from what I see but I have no idea whats going wrong there either.